[ML] Require max time aggregation to ensure correct datafeed restart (elastic/x-pack-elasticsearch#948)

Before this change, aggregation datafeeds used the histogram bucket key as the record timestamp that is posted to the job. That meant that the latest_record_timestamp at the end of a datafeed run was the start of the latest seen histogram bucket. Upon continuing the datafeed, the search starts from one millisecond after the latest_record_timestamp. Hence, data may be fetched for a second time. This change requires a max aggregation on the time_field nested in the histogram bucket. It then reads the timestamp from that agg. This ensures datafeed can restart without duplicating data. relates elastic/x-pack-elasticsearch#874 Original commit: elastic/x-pack-elasticsearch@f820efa866
2017-04-04 17:15:44 +01:00 · 2017-04-04 17:15:44 +01:00 · c9834bc826
parent 2153c71e8f
commit c9834bc826
7 changed files with 177 additions and 93 deletions
--- a/plugin/src/main/java/org/elasticsearch/xpack/ml/datafeed/extractor/aggregation/AggregationDataExtractor.java
+++ b/plugin/src/main/java/org/elasticsearch/xpack/ml/datafeed/extractor/aggregation/AggregationDataExtractor.java
@ -132,10 +132,6 @@ class AggregationDataExtractor implements DataExtractor {

        Aggregation topAgg = aggsAsList.get(0);
        if (topAgg instanceof Histogram) {
-            if (context.timeField.equals(topAgg.getName()) == false) {
-                throw new IllegalArgumentException("Histogram name [" + topAgg.getName()
-                        + "] does not match time field [" + context.timeField + "]");
-            }
            return ((Histogram) topAgg).getBuckets();
        } else {
            throw new IllegalArgumentException("Top level aggregation should be [histogram]");
@ -149,9 +145,10 @@ class AggregationDataExtractor implements DataExtractor {
        }

        ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
-        try (AggregationToJsonProcessor processor = new AggregationToJsonProcessor(context.includeDocCount, outputStream)) {
+        try (AggregationToJsonProcessor processor = new AggregationToJsonProcessor(
+                context.timeField, context.includeDocCount, outputStream)) {
            while (histogramBuckets.isEmpty() == false && processor.getKeyValueCount() < BATCH_KEY_VALUE_PAIRS) {
-                processor.process(context.timeField, histogramBuckets.removeFirst());
+                processor.process(histogramBuckets.removeFirst());
            }
            if (histogramBuckets.isEmpty()) {
                hasNext = false;
--- a/plugin/src/main/java/org/elasticsearch/xpack/ml/datafeed/extractor/aggregation/AggregationToJsonProcessor.java
+++ b/plugin/src/main/java/org/elasticsearch/xpack/ml/datafeed/extractor/aggregation/AggregationToJsonProcessor.java
@ -5,6 +5,7 @@
 */
 package org.elasticsearch.xpack.ml.datafeed.extractor.aggregation;

+import org.elasticsearch.common.Nullable;
 import org.elasticsearch.common.lease.Releasable;
 import org.elasticsearch.common.xcontent.XContentBuilder;
 import org.elasticsearch.common.xcontent.json.JsonXContent;
@ -13,10 +14,10 @@ import org.elasticsearch.search.aggregations.Aggregations;
 import org.elasticsearch.search.aggregations.bucket.histogram.Histogram;
 import org.elasticsearch.search.aggregations.bucket.terms.Terms;
 import org.elasticsearch.search.aggregations.metrics.NumericMetricsAggregation;
+import org.elasticsearch.search.aggregations.metrics.max.Max;
 import org.elasticsearch.search.aggregations.metrics.percentiles.Percentile;
 import org.elasticsearch.search.aggregations.metrics.percentiles.Percentiles;
 import org.elasticsearch.xpack.ml.datafeed.DatafeedConfig;
-import org.joda.time.base.BaseDateTime;

 import java.io.IOException;
 import java.io.OutputStream;
@ -26,18 +27,24 @@ import java.util.Iterator;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Objects;

 /**
 * Processes {@link Aggregation} objects and writes flat JSON documents for each leaf aggregation.
+ * In order to ensure that datafeeds can restart without duplicating data, we require that
+ * each histogram bucket has a nested max aggregation matching the time_field.
 */
 class AggregationToJsonProcessor implements Releasable {

+    private final String timeField;
    private final boolean includeDocCount;
    private final XContentBuilder jsonBuilder;
    private final Map<String, Object> keyValuePairs;
    private long keyValueWrittenCount;

-    AggregationToJsonProcessor(boolean includeDocCount, OutputStream outputStream) throws IOException {
+    AggregationToJsonProcessor(String timeField, boolean includeDocCount, OutputStream outputStream)
+            throws IOException {
+        this.timeField = Objects.requireNonNull(timeField);
        this.includeDocCount = includeDocCount;
        jsonBuilder = new XContentBuilder(JsonXContent.jsonXContent, outputStream);
        keyValuePairs = new LinkedHashMap<>();
@ -53,17 +60,28 @@ class AggregationToJsonProcessor implements Releasable {
     *       <li>{@link Percentiles}</li>
     *   </ul>
     */
-    public void process(String timeField, Histogram.Bucket bucket) throws IOException {
-        Object timestamp = bucket.getKey();
-        if (timestamp instanceof BaseDateTime) {
-            timestamp = ((BaseDateTime) timestamp).getMillis();
-        }
-        keyValuePairs.put(timeField, timestamp);
-        processNestedAggs(bucket.getDocCount(), bucket.getAggregations());
+    public void process(Histogram.Bucket bucket) throws IOException {
+        if (bucket.getDocCount() == 0) {
+            return;
        }

-    private void processNestedAggs(long docCount, Aggregations subAggs) throws IOException {
-        List<Aggregation> aggs = subAggs == null ? Collections.emptyList() : subAggs.asList();
+        Aggregations aggs = bucket.getAggregations();
+        Aggregation timeAgg = aggs == null ? null : aggs.get(timeField);
+        if (timeAgg instanceof Max == false) {
+            throw new IllegalArgumentException("Missing max aggregation for time_field [" + timeField + "]");
+        }
+
+        // We want to handle the max time aggregation only at the bucket level.
+        // So, we add the value here and then remove the aggregation before
+        // processing the rest of the sub aggs.
+        long timestamp = (long) ((Max) timeAgg).value();
+        keyValuePairs.put(timeField, timestamp);
+        List<Aggregation> subAggs = new ArrayList<>(aggs.asList());
+        subAggs.remove(timeAgg);
+        processNestedAggs(bucket.getDocCount(), subAggs);
+    }
+
+    private void processNestedAggs(long docCount, List<Aggregation> aggs) throws IOException {
        if (aggs.isEmpty()) {
            writeJsonObject(docCount);
            return;
@ -92,7 +110,7 @@ class AggregationToJsonProcessor implements Releasable {
    private void processTerms(Terms termsAgg) throws IOException {
        for (Terms.Bucket bucket : termsAgg.getBuckets()) {
            keyValuePairs.put(termsAgg.getName(), bucket.getKey());
-            processNestedAggs(bucket.getDocCount(), bucket.getAggregations());
+            processNestedAggs(bucket.getDocCount(), asList(bucket.getAggregations()));
            keyValuePairs.remove(termsAgg.getName());
        }
    }
@ -137,4 +155,8 @@ class AggregationToJsonProcessor implements Releasable {
    public long getKeyValueCount() {
        return keyValueWrittenCount;
    }
+
+    private static List<Aggregation> asList(@Nullable Aggregations aggs) {
+        return aggs == null ? Collections.emptyList() : aggs.asList();
+    }
 }
--- a/plugin/src/test/java/org/elasticsearch/xpack/ml/datafeed/extractor/aggregation/AggregationDataExtractorTests.java
+++ b/plugin/src/test/java/org/elasticsearch/xpack/ml/datafeed/extractor/aggregation/AggregationDataExtractorTests.java
@ -34,6 +34,7 @@ import java.util.stream.Collectors;

 import static org.elasticsearch.xpack.ml.datafeed.extractor.aggregation.AggregationTestUtils.Term;
 import static org.elasticsearch.xpack.ml.datafeed.extractor.aggregation.AggregationTestUtils.createHistogramBucket;
+import static org.elasticsearch.xpack.ml.datafeed.extractor.aggregation.AggregationTestUtils.createMax;
 import static org.elasticsearch.xpack.ml.datafeed.extractor.aggregation.AggregationTestUtils.createTerms;
 import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.equalTo;
@ -90,9 +91,11 @@ public class AggregationDataExtractorTests extends ESTestCase {
    public void testExtraction() throws IOException {
        List<Histogram.Bucket> histogramBuckets = Arrays.asList(
            createHistogramBucket(1000L, 3, Arrays.asList(
+                    createMax("time", 1999),
                    createTerms("airline", new Term("a", 1, "responsetime", 11.0), new Term("b", 2, "responsetime", 12.0)))),
            createHistogramBucket(2000L, 0, Arrays.asList()),
            createHistogramBucket(3000L, 7, Arrays.asList(
+                    createMax("time", 3999),
                    createTerms("airline", new Term("c", 4, "responsetime", 31.0), new Term("b", 3, "responsetime", 32.0))))
        );

@ -104,10 +107,10 @@ public class AggregationDataExtractorTests extends ESTestCase {
        assertThat(extractor.hasNext(), is(true));
        Optional<InputStream> stream = extractor.next();
        assertThat(stream.isPresent(), is(true));
-        String expectedStream = "{\"time\":1000,\"airline\":\"a\",\"responsetime\":11.0,\"doc_count\":1} "
-                + "{\"time\":1000,\"airline\":\"b\",\"responsetime\":12.0,\"doc_count\":2} "
-                + "{\"time\":3000,\"airline\":\"c\",\"responsetime\":31.0,\"doc_count\":4} "
-                + "{\"time\":3000,\"airline\":\"b\",\"responsetime\":32.0,\"doc_count\":3}";
+        String expectedStream = "{\"time\":1999,\"airline\":\"a\",\"responsetime\":11.0,\"doc_count\":1} "
+                + "{\"time\":1999,\"airline\":\"b\",\"responsetime\":12.0,\"doc_count\":2} "
+                + "{\"time\":3999,\"airline\":\"c\",\"responsetime\":31.0,\"doc_count\":4} "
+                + "{\"time\":3999,\"airline\":\"b\",\"responsetime\":32.0,\"doc_count\":3}";
        assertThat(asString(stream.get()), equalTo(expectedStream));
        assertThat(extractor.hasNext(), is(false));
        assertThat(capturedSearchRequests.size(), equalTo(1));
@ -128,7 +131,7 @@ public class AggregationDataExtractorTests extends ESTestCase {
        List<Histogram.Bucket> histogramBuckets = new ArrayList<>(buckets);
        long timestamp = 1000;
        for (int i = 0; i < buckets; i++) {
-            histogramBuckets.add(createHistogramBucket(timestamp, 3));
+            histogramBuckets.add(createHistogramBucket(timestamp, 3, Arrays.asList(createMax("time", timestamp))));
            timestamp += 1000L;
        }

@ -222,7 +225,7 @@ public class AggregationDataExtractorTests extends ESTestCase {
        List<Histogram.Bucket> histogramBuckets = new ArrayList<>(buckets);
        long timestamp = 1000;
        for (int i = 0; i < buckets; i++) {
-            histogramBuckets.add(createHistogramBucket(timestamp, 3));
+            histogramBuckets.add(createHistogramBucket(timestamp, 3, Arrays.asList(createMax("time", timestamp))));
            timestamp += 1000L;
        }

--- a/plugin/src/test/java/org/elasticsearch/xpack/ml/datafeed/extractor/aggregation/AggregationTestUtils.java
+++ b/plugin/src/test/java/org/elasticsearch/xpack/ml/datafeed/extractor/aggregation/AggregationTestUtils.java
@ -11,6 +11,7 @@ import org.elasticsearch.search.aggregations.bucket.histogram.Histogram;
 import org.elasticsearch.search.aggregations.bucket.terms.StringTerms;
 import org.elasticsearch.search.aggregations.bucket.terms.Terms;
 import org.elasticsearch.search.aggregations.metrics.NumericMetricsAggregation;
+import org.elasticsearch.search.aggregations.metrics.max.Max;
 import org.elasticsearch.search.aggregations.metrics.percentiles.Percentile;
 import org.elasticsearch.search.aggregations.metrics.percentiles.Percentiles;
 import org.joda.time.DateTime;
@ -39,6 +40,9 @@ public final class AggregationTestUtils {
    static Aggregations createAggs(List<Aggregation> aggsList) {
        Aggregations aggs = mock(Aggregations.class);
        when(aggs.asList()).thenReturn(aggsList);
+        for (Aggregation agg: aggsList) {
+            when(aggs.get(agg.getName())).thenReturn(agg);
+        }
        return aggs;
    }

@ -56,6 +60,14 @@ public final class AggregationTestUtils {
        return bucket;
    }

+    static Max createMax(String name, double value) {
+        Max max = mock(Max.class);
+        when(max.getName()).thenReturn(name);
+        when(max.value()).thenReturn(value);
+        when(max.getValue()).thenReturn(value);
+        return max;
+    }
+
    static NumericMetricsAggregation.SingleValue createSingleValue(String name, double value) {
        NumericMetricsAggregation.SingleValue singleValue = mock(NumericMetricsAggregation.SingleValue.class);
        when(singleValue.getName()).thenReturn(name);
--- a/plugin/src/test/java/org/elasticsearch/xpack/ml/datafeed/extractor/aggregation/AggregationToJsonProcessorTests.java
+++ b/plugin/src/test/java/org/elasticsearch/xpack/ml/datafeed/extractor/aggregation/AggregationToJsonProcessorTests.java
@ -9,7 +9,6 @@ import org.elasticsearch.search.aggregations.Aggregations;
 import org.elasticsearch.search.aggregations.bucket.histogram.Histogram;
 import org.elasticsearch.search.aggregations.bucket.terms.Terms;
 import org.elasticsearch.test.ESTestCase;
-import org.joda.time.DateTime;

 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
@ -22,8 +21,8 @@ import java.util.Map;

 import static org.elasticsearch.xpack.ml.datafeed.extractor.aggregation.AggregationTestUtils.Term;
 import static org.elasticsearch.xpack.ml.datafeed.extractor.aggregation.AggregationTestUtils.createAggs;
-import static org.elasticsearch.xpack.ml.datafeed.extractor.aggregation.AggregationTestUtils.createDateHistogramBucket;
 import static org.elasticsearch.xpack.ml.datafeed.extractor.aggregation.AggregationTestUtils.createHistogramBucket;
+import static org.elasticsearch.xpack.ml.datafeed.extractor.aggregation.AggregationTestUtils.createMax;
 import static org.elasticsearch.xpack.ml.datafeed.extractor.aggregation.AggregationTestUtils.createPercentiles;
 import static org.elasticsearch.xpack.ml.datafeed.extractor.aggregation.AggregationTestUtils.createSingleValue;
 import static org.elasticsearch.xpack.ml.datafeed.extractor.aggregation.AggregationTestUtils.createTerms;
@ -36,22 +35,42 @@ public class AggregationToJsonProcessorTests extends ESTestCase {

    private long keyValuePairsWritten = 0;

-    public void testProcessGivenHistogramOnly() throws IOException {
+    public void testProcessGivenMaxTimeIsMissing() throws IOException {
        List<Histogram.Bucket> histogramBuckets = Arrays.asList(
                createHistogramBucket(1000L, 3),
                createHistogramBucket(2000L, 5)
        );

+        IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> aggToString("time", histogramBuckets));
+        assertThat(e.getMessage(), containsString("Missing max aggregation for time_field [time]"));
+    }
+
+    public void testProcessGivenNonMaxTimeAgg() throws IOException {
+        List<Histogram.Bucket> histogramBuckets = Arrays.asList(
+                createHistogramBucket(1000L, 3, Arrays.asList(createTerms("time"))),
+                createHistogramBucket(2000L, 5, Arrays.asList(createTerms("time")))
+        );
+
+        IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> aggToString("time", histogramBuckets));
+        assertThat(e.getMessage(), containsString("Missing max aggregation for time_field [time]"));
+    }
+
+    public void testProcessGivenHistogramOnly() throws IOException {
+        List<Histogram.Bucket> histogramBuckets = Arrays.asList(
+                createHistogramBucket(1000L, 3, Arrays.asList(createMax("timestamp", 1200))),
+                createHistogramBucket(2000L, 5, Arrays.asList(createMax("timestamp", 2800)))
+        );
+
        String json = aggToString("timestamp", histogramBuckets);

-        assertThat(json, equalTo("{\"timestamp\":1000,\"doc_count\":3} {\"timestamp\":2000,\"doc_count\":5}"));
+        assertThat(json, equalTo("{\"timestamp\":1200,\"doc_count\":3} {\"timestamp\":2800,\"doc_count\":5}"));
        assertThat(keyValuePairsWritten, equalTo(4L));
    }

    public void testProcessGivenHistogramOnlyAndNoDocCount() throws IOException {
        List<Histogram.Bucket> histogramBuckets = Arrays.asList(
-                createHistogramBucket(1000L, 3),
-                createHistogramBucket(2000L, 5)
+                createHistogramBucket(1000L, 3, Arrays.asList(createMax("time", 1000))),
+                createHistogramBucket(2000L, 5, Arrays.asList(createMax("time", 2000)))
        );

        String json = aggToString("time", false, histogramBuckets);
@ -62,8 +81,10 @@ public class AggregationToJsonProcessorTests extends ESTestCase {

    public void testProcessGivenSingleMetricPerHistogram() throws IOException {
        List<Histogram.Bucket> histogramBuckets = Arrays.asList(
-                createHistogramBucket(1000L, 3, Arrays.asList(createSingleValue("my_value", 1.0))),
-                createHistogramBucket(2000L, 5, Arrays.asList(createSingleValue("my_value", 2.0)))
+                createHistogramBucket(1000L, 3, Arrays.asList(
+                        createMax("time", 1000), createSingleValue("my_value", 1.0))),
+                createHistogramBucket(2000L, 5, Arrays.asList(
+                        createMax("time", 2000), createSingleValue("my_value", 2.0)))
        );

        String json = aggToString("time", histogramBuckets);
@ -74,32 +95,41 @@ public class AggregationToJsonProcessorTests extends ESTestCase {
    public void testProcessGivenTermsPerHistogram() throws IOException {
        List<Histogram.Bucket> histogramBuckets = Arrays.asList(
                createHistogramBucket(1000L, 4, Arrays.asList(
+                        createMax("time", 1100),
                        createTerms("my_field", new Term("a", 1), new Term("b", 2), new Term("c", 1)))),
-                createHistogramBucket(2000L, 5, Arrays.asList(createTerms("my_field", new Term("a", 5), new Term("b", 2)))),
-                createHistogramBucket(3000L, 0, Arrays.asList()),
-                createHistogramBucket(4000L, 7, Arrays.asList(createTerms("my_field", new Term("c", 4), new Term("b", 3))))
+                createHistogramBucket(2000L, 5, Arrays.asList(
+                        createMax("time", 2200),
+                        createTerms("my_field", new Term("a", 5), new Term("b", 2)))),
+                createHistogramBucket(3000L, 0, Arrays.asList(createMax("time", -1))),
+                createHistogramBucket(4000L, 7, Arrays.asList(
+                        createMax("time", 4400),
+                        createTerms("my_field", new Term("c", 4), new Term("b", 3))))
        );

        String json = aggToString("time", histogramBuckets);

-        assertThat(json, equalTo("{\"time\":1000,\"my_field\":\"a\",\"doc_count\":1} " +
-                "{\"time\":1000,\"my_field\":\"b\",\"doc_count\":2} " +
-                "{\"time\":1000,\"my_field\":\"c\",\"doc_count\":1} " +
-                "{\"time\":2000,\"my_field\":\"a\",\"doc_count\":5} " +
-                "{\"time\":2000,\"my_field\":\"b\",\"doc_count\":2} " +
-                "{\"time\":4000,\"my_field\":\"c\",\"doc_count\":4} " +
-                "{\"time\":4000,\"my_field\":\"b\",\"doc_count\":3}"));
+        assertThat(json, equalTo("{\"time\":1100,\"my_field\":\"a\",\"doc_count\":1} " +
+                "{\"time\":1100,\"my_field\":\"b\",\"doc_count\":2} " +
+                "{\"time\":1100,\"my_field\":\"c\",\"doc_count\":1} " +
+                "{\"time\":2200,\"my_field\":\"a\",\"doc_count\":5} " +
+                "{\"time\":2200,\"my_field\":\"b\",\"doc_count\":2} " +
+                "{\"time\":4400,\"my_field\":\"c\",\"doc_count\":4} " +
+                "{\"time\":4400,\"my_field\":\"b\",\"doc_count\":3}"));
    }

    public void testProcessGivenSingleMetricPerSingleTermsPerHistogram() throws IOException {
        List<Histogram.Bucket> histogramBuckets = Arrays.asList(
-                createHistogramBucket(1000L, 4, Arrays.asList(createTerms("my_field",
-                        new Term("a", 1, "my_value", 11.0), new Term("b", 2, "my_value", 12.0), new Term("c", 1, "my_value", 13.0)))),
-                createHistogramBucket(2000L, 5, Arrays.asList(createTerms("my_field",
-                        new Term("a", 5, "my_value", 21.0), new Term("b", 2, "my_value", 22.0)))),
-                createHistogramBucket(3000L, 0, Arrays.asList()),
-                createHistogramBucket(4000L, 7, Arrays.asList(createTerms("my_field",
-                        new Term("c", 4, "my_value", 41.0), new Term("b", 3, "my_value", 42.0))))
+                createHistogramBucket(1000L, 4, Arrays.asList(
+                        createMax("time", 1000),
+                        createTerms("my_field", new Term("a", 1, "my_value", 11.0),
+                                new Term("b", 2, "my_value", 12.0), new Term("c", 1, "my_value", 13.0)))),
+                createHistogramBucket(2000L, 5, Arrays.asList(
+                        createMax("time", 2000),
+                        createTerms("my_field", new Term("a", 5, "my_value", 21.0), new Term("b", 2, "my_value", 22.0)))),
+                createHistogramBucket(3000L, 0, Arrays.asList(createMax("time", 3000))),
+                createHistogramBucket(4000L, 7, Arrays.asList(
+                        createMax("time", 4000),
+                        createTerms("my_field", new Term("c", 4, "my_value", 41.0), new Term("b", 3, "my_value", 42.0))))
        );

        String json = aggToString("time", histogramBuckets);
@ -136,13 +166,17 @@ public class AggregationToJsonProcessorTests extends ESTestCase {
        b4NumericAggs.put("my_value", 421.0);
        b4NumericAggs.put("my_value2", 422.0);
        List<Histogram.Bucket> histogramBuckets = Arrays.asList(
-                createHistogramBucket(1000L, 4, Arrays.asList(createTerms("my_field",
-                        new Term("a", 1, a1NumericAggs), new Term("b", 2, b1NumericAggs), new Term("c", 1, c1NumericAggs)))),
-                createHistogramBucket(2000L, 5, Arrays.asList(createTerms("my_field",
-                        new Term("a", 5, a2NumericAggs), new Term("b", 2, b2NumericAggs)))),
-                createHistogramBucket(3000L, 0, Arrays.asList()),
-                createHistogramBucket(4000L, 7, Arrays.asList(createTerms("my_field",
-                        new Term("c", 4, c4NumericAggs), new Term("b", 3, b4NumericAggs))))
+                createHistogramBucket(1000L, 4, Arrays.asList(
+                        createMax("time", 1000),
+                        createTerms("my_field", new Term("a", 1, a1NumericAggs),
+                                new Term("b", 2, b1NumericAggs), new Term("c", 1, c1NumericAggs)))),
+                createHistogramBucket(2000L, 5, Arrays.asList(
+                        createMax("time", 2000),
+                        createTerms("my_field", new Term("a", 5, a2NumericAggs), new Term("b", 2, b2NumericAggs)))),
+                createHistogramBucket(3000L, 0, Arrays.asList(createMax("time", 3000))),
+                createHistogramBucket(4000L, 7, Arrays.asList(
+                        createMax("time", 4000),
+                        createTerms("my_field", new Term("c", 4, c4NumericAggs), new Term("b", 3, b4NumericAggs))))
        );

        String json = aggToString("time", false, histogramBuckets);
@ -160,7 +194,7 @@ public class AggregationToJsonProcessorTests extends ESTestCase {
        Histogram.Bucket histogramBucket = createHistogramBucket(1000L, 2);
        Histogram anotherHistogram = mock(Histogram.class);
        when(anotherHistogram.getName()).thenReturn("nested-agg");
-        Aggregations subAggs = createAggs(Arrays.asList(anotherHistogram));
+        Aggregations subAggs = createAggs(Arrays.asList(createMax("time", 1000), anotherHistogram));
        when(histogramBucket.getAggregations()).thenReturn(subAggs);

        IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> aggToString("time", histogramBucket));
@ -171,30 +205,23 @@ public class AggregationToJsonProcessorTests extends ESTestCase {
        Histogram.Bucket histogramBucket = createHistogramBucket(1000L, 2);
        Terms terms1 = mock(Terms.class);
        Terms terms2 = mock(Terms.class);
-        Aggregations subAggs = createAggs(Arrays.asList(terms1, terms2));
+        Aggregations subAggs = createAggs(Arrays.asList(createMax("time", 1000), terms1, terms2));
        when(histogramBucket.getAggregations()).thenReturn(subAggs);

        IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> aggToString("time", histogramBucket));
        assertThat(e.getMessage(), containsString("Multiple non-leaf nested aggregations are not supported"));
    }

-    public void testProcessGivenHistogramWithDateTimeKeys() throws IOException {
-        List<Histogram.Bucket> histogramBuckets = Arrays.asList(
-                createDateHistogramBucket(new DateTime(1000L), 3),
-                createDateHistogramBucket(new DateTime(2000L), 5)
-        );
-
-        String json = aggToString("time", histogramBuckets);
-
-        assertThat(json, equalTo("{\"time\":1000,\"doc_count\":3} {\"time\":2000,\"doc_count\":5}"));
-    }
-
    public void testProcessGivenSinglePercentilesPerHistogram() throws IOException {
        List<Histogram.Bucket> histogramBuckets = Arrays.asList(
-                createHistogramBucket(1000L, 4, Arrays.asList(createPercentiles("my_field", 1.0))),
-                createHistogramBucket(2000L, 7, Arrays.asList(createPercentiles("my_field", 2.0))),
-                createHistogramBucket(3000L, 10, Arrays.asList(createPercentiles("my_field", 3.0))),
-                createHistogramBucket(4000L, 14, Arrays.asList(createPercentiles("my_field", 4.0)))
+                createHistogramBucket(1000L, 4, Arrays.asList(
+                        createMax("time", 1000), createPercentiles("my_field", 1.0))),
+                createHistogramBucket(2000L, 7, Arrays.asList(
+                        createMax("time", 2000), createPercentiles("my_field", 2.0))),
+                createHistogramBucket(3000L, 10, Arrays.asList(
+                        createMax("time", 3000), createPercentiles("my_field", 3.0))),
+                createHistogramBucket(4000L, 14, Arrays.asList(
+                        createMax("time", 4000), createPercentiles("my_field", 4.0)))
        );

        String json = aggToString("time", histogramBuckets);
@ -207,10 +234,14 @@ public class AggregationToJsonProcessorTests extends ESTestCase {

    public void testProcessGivenMultiplePercentilesPerHistogram() throws IOException {
        List<Histogram.Bucket> histogramBuckets = Arrays.asList(
-                createHistogramBucket(1000L, 4, Arrays.asList(createPercentiles("my_field", 1.0))),
-                createHistogramBucket(2000L, 7, Arrays.asList(createPercentiles("my_field", 2.0, 5.0))),
-                createHistogramBucket(3000L, 10, Arrays.asList(createPercentiles("my_field", 3.0))),
-                createHistogramBucket(4000L, 14, Arrays.asList(createPercentiles("my_field", 4.0)))
+                createHistogramBucket(1000L, 4, Arrays.asList(
+                        createMax("time", 1000), createPercentiles("my_field", 1.0))),
+                createHistogramBucket(2000L, 7, Arrays.asList(
+                        createMax("time", 2000), createPercentiles("my_field", 2.0, 5.0))),
+                createHistogramBucket(3000L, 10, Arrays.asList(
+                        createMax("time", 3000), createPercentiles("my_field", 3.0))),
+                createHistogramBucket(4000L, 14, Arrays.asList(
+                        createMax("time", 4000), createPercentiles("my_field", 4.0)))
        );

        IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> aggToString("time", histogramBuckets));
@ -227,9 +258,9 @@ public class AggregationToJsonProcessorTests extends ESTestCase {

    private String aggToString(String timeField, boolean includeDocCount, List<Histogram.Bucket> buckets) throws IOException {
        ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
-        try (AggregationToJsonProcessor processor = new AggregationToJsonProcessor(includeDocCount, outputStream)) {
+        try (AggregationToJsonProcessor processor = new AggregationToJsonProcessor(timeField, includeDocCount, outputStream)) {
            for (Histogram.Bucket bucket : buckets) {
-                processor.process(timeField, bucket);
+                processor.process(bucket);
            }
            keyValuePairsWritten = processor.getKeyValueCount();
        }
--- a/plugin/src/test/java/org/elasticsearch/xpack/ml/integration/DatafeedJobIT.java
+++ b/plugin/src/test/java/org/elasticsearch/xpack/ml/integration/DatafeedJobIT.java
@ -305,8 +305,10 @@ public class DatafeedJobIT extends ESRestTestCase {
                new StringEntity(job, ContentType.APPLICATION_JSON));

        String datafeedId = "datafeed-" + jobId;
-        String aggregations = "{\"time stamp\":{\"histogram\":{\"field\":\"time stamp\",\"interval\":3600000},"
-                + "\"aggregations\":{\"airline\":{\"terms\":{\"field\":\"airline\",\"size\":10},"
+        String aggregations = "{\"buckets\":{\"histogram\":{\"field\":\"time stamp\",\"interval\":3600000},"
+                + "\"aggregations\":{"
+                + "\"time stamp\":{\"max\":{\"field\":\"time stamp\"}},"
+                + "\"airline\":{\"terms\":{\"field\":\"airline\",\"size\":10},"
                + "  \"aggregations\":{\"responsetime\":{\"avg\":{\"field\":\"responsetime\"}}}}}}}";
        new DatafeedBuilder(datafeedId, jobId, "airline-data-aggs", "response").setAggregations(aggregations).build();
        openJob(client(), jobId);
@ -332,7 +334,9 @@ public class DatafeedJobIT extends ESRestTestCase {

        String datafeedId = "datafeed-" + jobId;
        String aggregations = "{\"time stamp\":{\"date_histogram\":{\"field\":\"time stamp\",\"interval\":\"1h\"},"
-                + "\"aggregations\":{\"airline\":{\"terms\":{\"field\":\"airline\",\"size\":10},"
+                + "\"aggregations\":{"
+                + "\"time stamp\":{\"max\":{\"field\":\"time stamp\"}},"
+                + "\"airline\":{\"terms\":{\"field\":\"airline\",\"size\":10},"
                + "  \"aggregations\":{\"responsetime\":{\"avg\":{\"field\":\"responsetime\"}}}}}}}";
        new DatafeedBuilder(datafeedId, jobId, "airline-data-aggs", "response").setAggregations(aggregations).build();
        openJob(client(), jobId);
--- a/plugin/src/test/resources/rest-api-spec/test/ml/preview_datafeed.yaml
+++ b/plugin/src/test/resources/rest-api-spec/test/ml/preview_datafeed.yaml
@ -142,12 +142,17 @@ setup:
            "indexes":"airline-data",
            "types":"response",
            "aggregations": {
-              "time": {
+              "buckets": {
                "histogram": {
                  "field": "time",
                  "interval": 3600000
                },
                "aggregations": {
+                  "time": {
+                    "max": {
+                      "field": "time"
+                    }
+                  },
                  "airline": {
                    "terms": {
                      "field": "airline",
@ -170,15 +175,15 @@ setup:
      xpack.ml.preview_datafeed:
        datafeed_id: aggregation-doc-count-feed
  - length: { $body: 3 }
-  - match: { 0.time: 1.487376E12 }
+  - match: { 0.time: 1487377800000 }
  - match: { 0.airline: foo }
  - match: { 0.responsetime: 2.0 }
  - match: { 0.doc_count: 2 }
-  - match: { 1.time: 1.4873796E12 }
+  - match: { 1.time: 1487379660000 }
  - match: { 1.airline: bar }
  - match: { 1.responsetime: 42.0 }
  - match: { 1.doc_count: 1 }
-  - match: { 1.time: 1.4873796E12 }
+  - match: { 1.time: 1487379660000 }
  - match: { 2.airline: foo }
  - match: { 2.responsetime: 42.0 }
  - match: { 2.doc_count: 1 }
@ -210,12 +215,17 @@ setup:
            "indexes":"airline-data",
            "types":"response",
            "aggregations": {
-              "time": {
+              "buckets": {
                "histogram": {
                  "field": "time",
                  "interval": 3600000
                },
                "aggregations": {
+                  "time": {
+                    "max": {
+                      "field": "time"
+                    }
+                  },
                  "dc_airline": {
                    "cardinality": {
                      "field": "airline"
@ -230,10 +240,10 @@ setup:
      xpack.ml.preview_datafeed:
        datafeed_id: aggregation-custom-single-metric-summary-feed
  - length: { $body: 2 }
-  - match: { 0.time: 1.487376E12 }
+  - match: { 0.time: 1487377800000 }
  - match: { 0.dc_airline: 1 }
  - is_false: 0.doc_count
-  - match: { 1.time: 1.4873796E12 }
+  - match: { 1.time: 1487379660000 }
  - match: { 1.dc_airline: 2 }
  - is_false: 1.doc_count

@ -264,12 +274,17 @@ setup:
            "indexes":"airline-data",
            "types":"response",
            "aggregations": {
-              "time": {
+              "buckets": {
                "histogram": {
                  "field": "time",
                  "interval": 3600000
                },
                "aggregations": {
+                  "time": {
+                    "max": {
+                      "field": "time"
+                    }
+                  },
                  "airline": {
                    "terms": {
                        "field": "airline"
@ -296,17 +311,17 @@ setup:
      xpack.ml.preview_datafeed:
        datafeed_id: aggregation-custom-multi-metric-summary-feed
  - length: { $body: 3 }
-  - match: { 0.time: 1.487376E12 }
+  - match: { 0.time: 1487377800000 }
  - match: { 0.airline: foo }
  - match: { 0.responsetime: 2.0 }
  - match: { 0.event_rate: 11 }
  - is_false: 0.doc_count
-  - match: { 1.time: 1.4873796E12 }
+  - match: { 1.time: 1487379660000 }
  - match: { 1.airline: bar }
  - match: { 1.responsetime: 42.0 }
  - match: { 1.event_rate: 8 }
  - is_false: 1.doc_count
-  - match: { 1.time: 1.4873796E12 }
+  - match: { 1.time: 1487379660000 }
  - match: { 2.airline: foo }
  - match: { 2.responsetime: 42.0 }
  - match: { 2.event_rate: 7 }