[ML] Also chunk aggregated datafeed by default (elastic/x-pack-elasticsearch#999)

The change applies chunking by default on aggregated datafeeds. The chunking is set to a manual mode with time_span being 1000 histogram buckets. The motivation for the change is two-fold: 1. It helps to avoid memory pressure/blowing. Users may perform a lookback on a very long period of time. In that case, we may hold a search response for all that time which could include too many buckets. By chunking, we avoid that situation as we know we'll only keep results for 1000 buckets at a time. 2. It makes cancellation more responsive. In elastic/x-pack-elasticsearch#862 we made the processing of a search response cancellable in a responsive manner. However, the search phase cannot be cancelled at the moment. Chunking makes the search phase shorter, which will result to a better user experience when they stop an aggregated datafeed. Also note the change sets the default chunking_config on datafeed creation so the setting is no longer hidden. Relates to elastic/x-pack-elasticsearch#803 Original commit: elastic/x-pack-elasticsearch@ae8f120f5f
2025-03-26 01:48:45 +00:00 · 2017-04-10 18:20:48 +01:00 · 2017-04-10 18:20:48 +01:00 · 1e1b5405b3
commit 1e1b5405b3
parent 0b6ac175da
6 changed files with 60 additions and 12 deletions
--- a/plugin/src/main/java/org/elasticsearch/xpack/ml/datafeed/DatafeedConfig.java
+++ b/plugin/src/main/java/org/elasticsearch/xpack/ml/datafeed/DatafeedConfig.java
@ -212,7 +212,11 @@ public class DatafeedConfig extends AbstractDiffable<DatafeedConfig> implements
     * The method expects a valid top level aggregation to exist.
     */
    public long getHistogramIntervalMillis() {
-        AggregationBuilder topLevelAgg = getTopLevelAgg();
+        return getHistogramIntervalMillis(aggregations);
+    }
+
+    private static long getHistogramIntervalMillis(AggregatorFactories.Builder aggregations) {
+        AggregationBuilder topLevelAgg = getTopLevelAgg(aggregations);
        if (topLevelAgg == null) {
            throw new IllegalStateException("No aggregations exist");
        }
@ -225,7 +229,7 @@ public class DatafeedConfig extends AbstractDiffable<DatafeedConfig> implements
        }
    }

-    private AggregationBuilder getTopLevelAgg() {
+    private static AggregationBuilder getTopLevelAgg(AggregatorFactories.Builder aggregations) {
        if (aggregations == null || aggregations.getAggregatorFactories().isEmpty()) {
            return null;
        }
@ -420,6 +424,7 @@ public class DatafeedConfig extends AbstractDiffable<DatafeedConfig> implements

        private static final int DEFAULT_SCROLL_SIZE = 1000;
        private static final TimeValue DEFAULT_QUERY_DELAY = TimeValue.timeValueMinutes(1);
+        private static final int DEFAULT_AGGREGATION_CHUNKING_BUCKETS = 1000;

        private String id;
        private String jobId;
@ -531,6 +536,7 @@ public class DatafeedConfig extends AbstractDiffable<DatafeedConfig> implements
                throw invalidOptionValue(TYPES.getPreferredName(), types);
            }
            validateAggregations();
+            setDefaultChunkingConfig();
            return new DatafeedConfig(id, jobId, queryDelay, frequency, indexes, types, query, aggregations, scriptFields, scrollSize,
                    source, chunkingConfig);
        }
@ -560,6 +566,18 @@ public class DatafeedConfig extends AbstractDiffable<DatafeedConfig> implements
            }
        }

+        private void setDefaultChunkingConfig() {
+            if (chunkingConfig == null) {
+                if (aggregations == null) {
+                    chunkingConfig = ChunkingConfig.newAuto();
+                } else {
+                    long histogramIntervalMillis = getHistogramIntervalMillis(aggregations);
+                    chunkingConfig = ChunkingConfig.newManual(TimeValue.timeValueMillis(
+                            DEFAULT_AGGREGATION_CHUNKING_BUCKETS * histogramIntervalMillis));
+                }
+            }
+        }
+
        private static ElasticsearchException invalidOptionValue(String fieldName, Object value) {
            String msg = Messages.getMessage(Messages.DATAFEED_CONFIG_INVALID_OPTION_VALUE, fieldName, value);
            throw new IllegalArgumentException(msg);
--- a/plugin/src/main/java/org/elasticsearch/xpack/ml/datafeed/extractor/DataExtractorFactory.java
+++ b/plugin/src/main/java/org/elasticsearch/xpack/ml/datafeed/extractor/DataExtractorFactory.java
@ -6,7 +6,6 @@
 package org.elasticsearch.xpack.ml.datafeed.extractor;

 import org.elasticsearch.client.Client;
-import org.elasticsearch.xpack.ml.datafeed.ChunkingConfig;
 import org.elasticsearch.xpack.ml.datafeed.DatafeedConfig;
 import org.elasticsearch.xpack.ml.datafeed.extractor.aggregation.AggregationDataExtractorFactory;
 import org.elasticsearch.xpack.ml.datafeed.extractor.chunked.ChunkedDataExtractorFactory;
@ -23,12 +22,7 @@ public interface DataExtractorFactory {
        boolean isScrollSearch = datafeedConfig.hasAggregations() == false;
        DataExtractorFactory dataExtractorFactory = isScrollSearch ? new ScrollDataExtractorFactory(client, datafeedConfig, job)
                : new AggregationDataExtractorFactory(client, datafeedConfig, job);
-        ChunkingConfig chunkingConfig = datafeedConfig.getChunkingConfig();
-        if (chunkingConfig == null) {
-            chunkingConfig = isScrollSearch ? ChunkingConfig.newAuto() : ChunkingConfig.newOff();
-        }
-
-        return chunkingConfig.isEnabled() ? new ChunkedDataExtractorFactory(client, datafeedConfig, job, dataExtractorFactory)
-                : dataExtractorFactory;
+        return datafeedConfig.getChunkingConfig().isEnabled() ? new ChunkedDataExtractorFactory(
+                client, datafeedConfig, job, dataExtractorFactory) : dataExtractorFactory;
    }
 }
--- a/plugin/src/main/java/org/elasticsearch/xpack/ml/datafeed/extractor/chunked/ChunkedDataExtractorFactory.java
+++ b/plugin/src/main/java/org/elasticsearch/xpack/ml/datafeed/extractor/chunked/ChunkedDataExtractorFactory.java
@ -38,7 +38,7 @@ public class ChunkedDataExtractorFactory implements DataExtractorFactory {
                datafeedConfig.getScrollSize(),
                start,
                end,
-                datafeedConfig.getChunkingConfig() == null ? null : datafeedConfig.getChunkingConfig().getTimeSpan());
+                datafeedConfig.getChunkingConfig().getTimeSpan());
        return new ChunkedDataExtractor(client, dataExtractorFactory, dataExtractorContext);
    }
 }
--- a/plugin/src/test/java/org/elasticsearch/xpack/ml/datafeed/DatafeedConfigTests.java
+++ b/plugin/src/test/java/org/elasticsearch/xpack/ml/datafeed/DatafeedConfigTests.java
@ -292,6 +292,20 @@ public class DatafeedConfigTests extends AbstractSerializingTestCase<DatafeedCon
        assertThat(e.getMessage(), containsString("When specifying a date_histogram calendar interval [8d]"));
    }

+    public void testDefaultChunkingConfig_GivenAggregations() {
+        assertThat(createDatafeedWithDateHistogram("1s").getChunkingConfig(),
+                equalTo(ChunkingConfig.newManual(TimeValue.timeValueSeconds(1000))));
+        assertThat(createDatafeedWithDateHistogram("2h").getChunkingConfig(),
+                equalTo(ChunkingConfig.newManual(TimeValue.timeValueHours(2000))));
+    }
+
+    public void testChunkingConfig_GivenExplicitSetting() {
+        DatafeedConfig.Builder builder = new DatafeedConfig.Builder(createDatafeedWithDateHistogram("30s"));
+        builder.setChunkingConfig(ChunkingConfig.newAuto());
+
+        assertThat(builder.build().getChunkingConfig(), equalTo(ChunkingConfig.newAuto()));
+    }
+
    public static String randomValidDatafeedId() {
        CodepointSetGenerator generator =  new CodepointSetGenerator("abcdefghijklmnopqrstuvwxyz".toCharArray());
        return generator.ofCodePointsLength(random(), 10, 10);
--- a/plugin/src/test/java/org/elasticsearch/xpack/ml/datafeed/extractor/DataExtractorFactoryTests.java
+++ b/plugin/src/test/java/org/elasticsearch/xpack/ml/datafeed/extractor/DataExtractorFactoryTests.java
@ -74,7 +74,7 @@ public class DataExtractorFactoryTests extends ESTestCase {
        assertThat(dataExtractorFactory, instanceOf(ScrollDataExtractorFactory.class));
    }

-    public void testCreateDataExtractorFactoryGivenAggregation() {
+    public void testCreateDataExtractorFactoryGivenDefaultAggregation() {
        DataDescription.Builder dataDescription = new DataDescription.Builder();
        dataDescription.setTimeField("time");
        Job.Builder jobBuilder = DatafeedManagerTests.createDatafeedJob();
@ -86,6 +86,22 @@ public class DataExtractorFactoryTests extends ESTestCase {
        DataExtractorFactory dataExtractorFactory =
                DataExtractorFactory.create(client, datafeedConfig.build(), jobBuilder.build(new Date()));

+        assertThat(dataExtractorFactory, instanceOf(ChunkedDataExtractorFactory.class));
+    }
+
+    public void testCreateDataExtractorFactoryGivenAggregationWithOffChunk() {
+        DataDescription.Builder dataDescription = new DataDescription.Builder();
+        dataDescription.setTimeField("time");
+        Job.Builder jobBuilder = DatafeedManagerTests.createDatafeedJob();
+        jobBuilder.setDataDescription(dataDescription);
+        DatafeedConfig.Builder datafeedConfig = DatafeedManagerTests.createDatafeedConfig("datafeed1", "foo");
+        datafeedConfig.setChunkingConfig(ChunkingConfig.newOff());
+        datafeedConfig.setAggregations(AggregatorFactories.builder().addAggregator(
+                AggregationBuilders.histogram("time").interval(300000)));
+
+        DataExtractorFactory dataExtractorFactory =
+                DataExtractorFactory.create(client, datafeedConfig.build(), jobBuilder.build(new Date()));
+
        assertThat(dataExtractorFactory, instanceOf(AggregationDataExtractorFactory.class));
    }

--- a/plugin/src/test/resources/rest-api-spec/test/ml/datafeeds_crud.yaml
+++ b/plugin/src/test/resources/rest-api-spec/test/ml/datafeeds_crud.yaml
@ -70,6 +70,12 @@ setup:
            "types":["type-bar"]
          }
  - match: { datafeed_id: "test-datafeed-1" }
+  - match: { job_id: "job-1" }
+  - match: { indexes: ["index-foo"] }
+  - match: { types: ["type-bar"] }
+  - match: { scroll_size: 1000 }
+  - is_true: query.match_all
+  - match: { chunking_config: { mode: "auto" }}

 ---
 "Test put datafeed whose id is already taken":