Replace http data extractor with a client extractor (elastic/elasticsearch#619)

* Replace http data extractor with a client extractor This first implementation replaces the HTTP extractor with a client extractor that uses search & scroll. Note that this first implementation has some limitations: - Only reads data that are in the _source - Does not handle aggregated searches These limitations will be addressed in follow up PRs. Relates to elastic/elasticsearch#154 Original commit: elastic/x-pack-elasticsearch@f692ed961c
2017-01-10 11:45:17 +00:00 · 2017-01-10 11:45:17 +00:00 · 9e5245fd64
parent dee7412044
commit 9e5245fd64
31 changed files with 916 additions and 2030 deletions
--- a/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/PrelertPlugin.java
+++ b/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/PrelertPlugin.java
@ -47,16 +47,16 @@ import org.elasticsearch.xpack.prelert.action.GetModelSnapshotsAction;
 import org.elasticsearch.xpack.prelert.action.GetRecordsAction;
 import org.elasticsearch.xpack.prelert.action.GetSchedulersAction;
 import org.elasticsearch.xpack.prelert.action.GetSchedulersStatsAction;
 import org.elasticsearch.xpack.prelert.action.PostDataAction;
 import org.elasticsearch.xpack.prelert.action.OpenJobAction;
 import org.elasticsearch.xpack.prelert.action.PostDataAction;
 import org.elasticsearch.xpack.prelert.action.PutJobAction;
 import org.elasticsearch.xpack.prelert.action.PutListAction;
 import org.elasticsearch.xpack.prelert.action.UpdateModelSnapshotAction;
 import org.elasticsearch.xpack.prelert.action.PutSchedulerAction;
 import org.elasticsearch.xpack.prelert.action.RevertModelSnapshotAction;
 import org.elasticsearch.xpack.prelert.action.StartSchedulerAction;
 import org.elasticsearch.xpack.prelert.action.StopSchedulerAction;
 import org.elasticsearch.xpack.prelert.action.UpdateJobStatusAction;
 import org.elasticsearch.xpack.prelert.action.UpdateModelSnapshotAction;
 import org.elasticsearch.xpack.prelert.action.UpdateSchedulerStatusAction;
 import org.elasticsearch.xpack.prelert.action.ValidateDetectorAction;
 import org.elasticsearch.xpack.prelert.action.ValidateTransformAction;
@ -90,16 +90,16 @@ import org.elasticsearch.xpack.prelert.rest.job.RestDeleteJobAction;
 import org.elasticsearch.xpack.prelert.rest.job.RestFlushJobAction;
 import org.elasticsearch.xpack.prelert.rest.job.RestGetJobsAction;
 import org.elasticsearch.xpack.prelert.rest.job.RestGetJobsStatsAction;
 import org.elasticsearch.xpack.prelert.rest.job.RestPostDataAction;
 import org.elasticsearch.xpack.prelert.rest.job.RestOpenJobAction;
 import org.elasticsearch.xpack.prelert.rest.job.RestPostDataAction;
 import org.elasticsearch.xpack.prelert.rest.job.RestPutJobAction;
 import org.elasticsearch.xpack.prelert.rest.list.RestDeleteListAction;
 import org.elasticsearch.xpack.prelert.rest.list.RestGetListAction;
 import org.elasticsearch.xpack.prelert.rest.list.RestPutListAction;
 import org.elasticsearch.xpack.prelert.rest.modelsnapshots.RestDeleteModelSnapshotAction;
 import org.elasticsearch.xpack.prelert.rest.modelsnapshots.RestGetModelSnapshotsAction;
 import org.elasticsearch.xpack.prelert.rest.modelsnapshots.RestUpdateModelSnapshotAction;
 import org.elasticsearch.xpack.prelert.rest.modelsnapshots.RestRevertModelSnapshotAction;
 import org.elasticsearch.xpack.prelert.rest.modelsnapshots.RestUpdateModelSnapshotAction;
 import org.elasticsearch.xpack.prelert.rest.results.RestGetBucketsAction;
 import org.elasticsearch.xpack.prelert.rest.results.RestGetCategoriesAction;
 import org.elasticsearch.xpack.prelert.rest.results.RestGetInfluencersAction;
@ -114,7 +114,6 @@ import org.elasticsearch.xpack.prelert.rest.validate.RestValidateDetectorAction;
 import org.elasticsearch.xpack.prelert.rest.validate.RestValidateTransformAction;
 import org.elasticsearch.xpack.prelert.rest.validate.RestValidateTransformsAction;
 import org.elasticsearch.xpack.prelert.scheduler.ScheduledJobRunner;
 import org.elasticsearch.xpack.prelert.scheduler.http.HttpDataExtractorFactory;
 import org.elasticsearch.xpack.prelert.utils.NamedPipeHelper;
 import java.io.IOException;
@ -209,8 +208,6 @@ public class PrelertPlugin extends Plugin implements ActionPlugin {
                jobResultsPersister, jobRenormalizedResultsPersister, jobDataCountsPersister, autodetectResultsParser,
                autodetectProcessFactory, normalizerFactory);
        ScheduledJobRunner scheduledJobRunner = new ScheduledJobRunner(threadPool, client, clusterService, jobProvider,
                // norelease: we will no longer need to pass the client here after we switch to a client based data extractor
                new HttpDataExtractorFactory(client, searchRequestParsers),
                System::currentTimeMillis);
        TaskManager taskManager = new TaskManager(settings);
--- a/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/job/DataDescription.java
+++ b/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/job/DataDescription.java
@ -43,6 +43,7 @@ public class DataDescription extends ToXContentToBytes implements Writeable {
        JSON("json"),
        DELIMITED("delimited"),
        SINGLE_LINE("single_line"),
        // TODO norelease, this can now be removed
        ELASTICSEARCH("elasticsearch");
        /**
--- a/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/job/process/autodetect/writer/AbstractJsonRecordReader.java
+++ b/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/job/process/autodetect/writer/AbstractJsonRecordReader.java
@ -22,7 +22,6 @@ abstract class AbstractJsonRecordReader implements JsonRecordReader {
    // NORELEASE - Remove direct dependency on Jackson
    protected final JsonParser parser;
    protected final Map<String, Integer> fieldMap;
    protected final String recordHoldingField;
    protected final Logger logger;
    protected int nestedLevel;
    protected long fieldCount;
@ -35,36 +34,15 @@ abstract class AbstractJsonRecordReader implements JsonRecordReader {
     *            The JSON parser
     * @param fieldMap
     *            Map to field name to record array index position
     * @param recordHoldingField
     *            record holding field
     * @param logger
     *            the logger
     */
-    AbstractJsonRecordReader(JsonParser parser, Map<String, Integer> fieldMap, String recordHoldingField, Logger logger) {
+    AbstractJsonRecordReader(JsonParser parser, Map<String, Integer> fieldMap, Logger logger) {
        this.parser = Objects.requireNonNull(parser);
        this.fieldMap = Objects.requireNonNull(fieldMap);
        this.recordHoldingField = Objects.requireNonNull(recordHoldingField);
        this.logger = Objects.requireNonNull(logger);
    }
    protected void consumeToField(String field) throws IOException {
        if (field == null || field.isEmpty()) {
            return;
        }
        JsonToken token = null;
        while ((token = tryNextTokenOrReadToEndOnError()) != null) {
            if (token == JsonToken.FIELD_NAME
                    && parser.getCurrentName().equals(field)) {
                tryNextTokenOrReadToEndOnError();
                return;
            }
        }
    }
    protected void consumeToRecordHoldingField() throws IOException {
        consumeToField(recordHoldingField);
    }
    protected void initArrays(String[] record, boolean[] gotFields) {
        Arrays.fill(gotFields, false);
        Arrays.fill(record, "");
--- a/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/job/process/autodetect/writer/JsonDataToProcessWriter.java
+++ b/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/job/process/autodetect/writer/JsonDataToProcessWriter.java
@ -11,7 +11,6 @@ import org.apache.logging.log4j.Logger;
 import org.elasticsearch.xpack.prelert.job.AnalysisConfig;
 import org.elasticsearch.xpack.prelert.job.DataCounts;
 import org.elasticsearch.xpack.prelert.job.DataDescription;
 import org.elasticsearch.xpack.prelert.job.DataDescription.DataFormat;
 import org.elasticsearch.xpack.prelert.job.process.autodetect.AutodetectProcess;
 import org.elasticsearch.xpack.prelert.job.status.StatusReporter;
 import org.elasticsearch.xpack.prelert.job.transform.TransformConfigs;
@ -32,7 +31,6 @@ import java.util.function.Supplier;
 * detailed description.
 */
 class JsonDataToProcessWriter extends AbstractDataToProcessWriter {
    private static final String ELASTICSEARCH_SOURCE_FIELD = "_source";
    public JsonDataToProcessWriter(boolean includeControlField, AutodetectProcess autodetectProcess, DataDescription dataDescription,
                                   AnalysisConfig analysisConfig, TransformConfigs transforms, StatusReporter statusReporter,
@ -73,7 +71,7 @@ class JsonDataToProcessWriter extends AbstractDataToProcessWriter {
        // We never expect to get the control field
        boolean[] gotFields = new boolean[analysisFields.size()];
-        JsonRecordReader recordReader = new SimpleJsonRecordReader(parser, inFieldIndexes, getRecordHoldingField(), logger);
+        JsonRecordReader recordReader = new SimpleJsonRecordReader(parser, inFieldIndexes, logger);
        long inputFieldCount = recordReader.read(input, gotFields);
        while (inputFieldCount >= 0) {
            Arrays.fill(record, "");
@ -96,13 +94,6 @@ class JsonDataToProcessWriter extends AbstractDataToProcessWriter {
        }
    }
    private String getRecordHoldingField() {
        if (dataDescription.getFormat().equals(DataFormat.ELASTICSEARCH)) {
            return ELASTICSEARCH_SOURCE_FIELD;
        }
        return "";
    }
    /**
     * Don't enforce the check that all the fields are present in JSON docs.
     * Always returns true
--- a/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/job/process/autodetect/writer/SimpleJsonRecordReader.java
+++ b/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/job/process/autodetect/writer/SimpleJsonRecordReader.java
@ -26,13 +26,11 @@ class SimpleJsonRecordReader extends AbstractJsonRecordReader {
     *            The JSON parser
     * @param fieldMap
     *            Map to field name to record array index position
     * @param recordHoldingField
     *            record field
     * @param logger
     *            logger
     */
-    SimpleJsonRecordReader(JsonParser parser, Map<String, Integer> fieldMap, String recordHoldingField, Logger logger) {
+    SimpleJsonRecordReader(JsonParser parser, Map<String, Integer> fieldMap, Logger logger) {
-        super(parser, fieldMap, recordHoldingField, logger);
+        super(parser, fieldMap, logger);
    }
    /**
@ -57,7 +55,6 @@ class SimpleJsonRecordReader extends AbstractJsonRecordReader {
        initArrays(record, gotFields);
        fieldCount = 0;
        clearNestedLevel();
        consumeToRecordHoldingField();
        JsonToken token = tryNextTokenOrReadToEndOnError();
        while (!(token == JsonToken.END_OBJECT && nestedLevel == 0)) {
--- a/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/scheduler/ScheduledJob.java
+++ b/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/scheduler/ScheduledJob.java
@ -15,8 +15,9 @@ import org.elasticsearch.xpack.prelert.action.FlushJobAction;
 import org.elasticsearch.xpack.prelert.action.PostDataAction;
 import org.elasticsearch.xpack.prelert.job.DataCounts;
 import org.elasticsearch.xpack.prelert.job.audit.Auditor;
 import org.elasticsearch.xpack.prelert.job.extraction.DataExtractor;
 import org.elasticsearch.xpack.prelert.job.messages.Messages;
 import org.elasticsearch.xpack.prelert.scheduler.extractor.DataExtractor;
 import org.elasticsearch.xpack.prelert.scheduler.extractor.DataExtractorFactory;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
@ -26,29 +27,29 @@ import java.util.function.Supplier;
 class ScheduledJob {
    private static final Logger LOGGER = Loggers.getLogger(ScheduledJob.class);
    private static final int NEXT_TASK_DELAY_MS = 100;
    private final Logger logger;
    private final Auditor auditor;
    private final String jobId;
    private final long frequencyMs;
    private final long queryDelayMs;
    private final Client client;
-    private final DataExtractor dataExtractor;
+    private final DataExtractorFactory dataExtractorFactory;
    private final Supplier<Long> currentTimeSupplier;
    private volatile DataExtractor dataExtractor;
    private volatile long lookbackStartTimeMs;
    private volatile Long lastEndTimeMs;
    private volatile boolean running = true;
-    ScheduledJob(String jobId, long frequencyMs, long queryDelayMs, DataExtractor dataExtractor,
+    ScheduledJob(String jobId, long frequencyMs, long queryDelayMs, DataExtractorFactory dataExtractorFactory,
                 Client client, Auditor auditor, Supplier<Long> currentTimeSupplier,
                 long latestFinalBucketEndTimeMs, long latestRecordTimeMs) {
        this.logger = Loggers.getLogger(jobId);
        this.jobId = jobId;
        this.frequencyMs = frequencyMs;
        this.queryDelayMs = queryDelayMs;
-        this.dataExtractor = dataExtractor;
+        this.dataExtractorFactory = dataExtractorFactory;
        this.client = client;
        this.auditor = auditor;
        this.currentTimeSupplier = currentTimeSupplier;
@ -82,7 +83,7 @@ class ScheduledJob {
        request.setCalcInterim(true);
        run(lookbackStartTimeMs, lookbackEnd, request);
        auditor.info(Messages.getMessage(Messages.JOB_AUDIT_SCHEDULER_LOOKBACK_COMPLETED));
-        logger.info("Lookback has finished");
+        LOGGER.info("[{}] Lookback has finished", jobId);
        if (isLookbackOnly) {
            return null;
        } else {
@ -103,8 +104,10 @@ class ScheduledJob {
    }
    public void stop() {
        if (dataExtractor != null) {
            dataExtractor.cancel();
        }
        running = false;
        dataExtractor.cancel();
        auditor.info(Messages.getMessage(Messages.JOB_AUDIT_SCHEDULER_STOPPED));
    }
@ -117,12 +120,12 @@ class ScheduledJob {
            return;
        }
-        logger.trace("Searching data in: [" + start + ", " + end + ")");
+        LOGGER.trace("[{}] Searching data in: [{}, {})", jobId, start, end);
        RuntimeException error = null;
        long recordCount = 0;
-        dataExtractor.newSearch(start, end, logger);
+        dataExtractor = dataExtractorFactory.newExtractor(start, end);
-        while (running && dataExtractor.hasNext()) {
+        while (dataExtractor.hasNext()) {
            Optional<InputStream> extractedData;
            try {
                extractedData = dataExtractor.next();
@ -152,6 +155,7 @@ class ScheduledJob {
                }
            }
        }
        dataExtractor = null;
        lastEndTimeMs = Math.max(lastEndTimeMs == null ? 0 : lastEndTimeMs, end - 1);
--- a/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/scheduler/ScheduledJobRunner.java
+++ b/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/scheduler/ScheduledJobRunner.java
@ -24,14 +24,14 @@ import org.elasticsearch.xpack.prelert.job.Job;
 import org.elasticsearch.xpack.prelert.job.JobStatus;
 import org.elasticsearch.xpack.prelert.job.audit.Auditor;
 import org.elasticsearch.xpack.prelert.job.config.DefaultFrequency;
 import org.elasticsearch.xpack.prelert.job.extraction.DataExtractor;
 import org.elasticsearch.xpack.prelert.job.extraction.DataExtractorFactory;
 import org.elasticsearch.xpack.prelert.job.metadata.Allocation;
 import org.elasticsearch.xpack.prelert.job.metadata.PrelertMetadata;
 import org.elasticsearch.xpack.prelert.job.persistence.BucketsQueryBuilder;
 import org.elasticsearch.xpack.prelert.job.persistence.JobProvider;
 import org.elasticsearch.xpack.prelert.job.persistence.QueryPage;
 import org.elasticsearch.xpack.prelert.job.results.Bucket;
 import org.elasticsearch.xpack.prelert.scheduler.extractor.DataExtractorFactory;
 import org.elasticsearch.xpack.prelert.scheduler.extractor.scroll.ScrollDataExtractorFactory;
 import org.elasticsearch.xpack.prelert.utils.ExceptionsHelper;
 import java.time.Duration;
@ -47,18 +47,16 @@ public class ScheduledJobRunner extends AbstractComponent {
    private final Client client;
    private final ClusterService clusterService;
    private final JobProvider jobProvider;
    private final DataExtractorFactory dataExtractorFactory;
    private final ThreadPool threadPool;
    private final Supplier<Long> currentTimeSupplier;
    public ScheduledJobRunner(ThreadPool threadPool, Client client, ClusterService clusterService, JobProvider jobProvider,
-                              DataExtractorFactory dataExtractorFactory, Supplier<Long> currentTimeSupplier) {
+                              Supplier<Long> currentTimeSupplier) {
        super(Settings.EMPTY);
        this.threadPool = threadPool;
        this.clusterService = Objects.requireNonNull(clusterService);
        this.client = Objects.requireNonNull(client);
        this.clusterService = Objects.requireNonNull(clusterService);
        this.jobProvider = Objects.requireNonNull(jobProvider);
-        this.dataExtractorFactory = Objects.requireNonNull(dataExtractorFactory);
+        this.threadPool = threadPool;
        this.currentTimeSupplier = Objects.requireNonNull(currentTimeSupplier);
    }
@ -192,14 +190,18 @@ public class ScheduledJobRunner extends AbstractComponent {
        Auditor auditor = jobProvider.audit(job.getId());
        Duration frequency = getFrequencyOrDefault(scheduler, job);
        Duration queryDelay = Duration.ofSeconds(scheduler.getConfig().getQueryDelay());
-        DataExtractor dataExtractor = dataExtractorFactory.newExtractor(scheduler.getConfig(), job);
+        DataExtractorFactory dataExtractorFactory = createDataExtractorFactory(scheduler.getConfig(), job);
        ScheduledJob scheduledJob =  new ScheduledJob(job.getId(), frequency.toMillis(), queryDelay.toMillis(),
-                dataExtractor, client, auditor, currentTimeSupplier, finalBucketEndMs, latestRecordTimeMs);
+                dataExtractorFactory, client, auditor, currentTimeSupplier, finalBucketEndMs, latestRecordTimeMs);
        Holder holder = new Holder(scheduler, scheduledJob, new ProblemTracker(() -> auditor), handler);
        task.setHolder(holder);
        return holder;
    }
    DataExtractorFactory createDataExtractorFactory(SchedulerConfig schedulerConfig, Job job) {
        return new ScrollDataExtractorFactory(client, schedulerConfig, job);
    }
    private void gatherInformation(String jobId, BiConsumer<QueryPage<Bucket>, DataCounts> handler, Consumer<Exception> errorHandler) {
        BucketsQueryBuilder.BucketsQuery latestBucketQuery = new BucketsQueryBuilder()
                .sortField(Bucket.TIMESTAMP.getPreferredName())
--- a/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/scheduler/extractor/DataExtractor.java
+++ b/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/scheduler/extractor/DataExtractor.java
@ -3,28 +3,13 @@
 * or more contributor license agreements. Licensed under the Elastic License;
 * you may not use this file except in compliance with the Elastic License.
 */
-package org.elasticsearch.xpack.prelert.job.extraction;
+package org.elasticsearch.xpack.prelert.scheduler.extractor;
 import org.apache.logging.log4j.Logger;
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.Optional;
 public interface DataExtractor {
    /**
     * Set-up the extractor for a new search
     *
     * @param start start time
     * @param end end time
     * @param logger logger
     */
    void newSearch(long start, long end, Logger logger) throws IOException;
    /**
     * Cleans up after a search.
     */
    void clear();
    /**
     * @return {@code true} if the search has not finished yet, or {@code false} otherwise
--- a/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/scheduler/extractor/DataExtractorFactory.java
+++ b/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/scheduler/extractor/DataExtractorFactory.java
@ -3,11 +3,8 @@
 * or more contributor license agreements. Licensed under the Elastic License;
 * you may not use this file except in compliance with the Elastic License.
 */
-package org.elasticsearch.xpack.prelert.job.extraction;
+package org.elasticsearch.xpack.prelert.scheduler.extractor;
 import org.elasticsearch.xpack.prelert.job.Job;
 import org.elasticsearch.xpack.prelert.scheduler.SchedulerConfig;
 public interface DataExtractorFactory {
-    DataExtractor newExtractor(SchedulerConfig schedulerConfig, Job job);
+    DataExtractor newExtractor(long start, long end);
 }
--- a/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/scheduler/extractor/SearchHitFieldExtractor.java
+++ b/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/scheduler/extractor/SearchHitFieldExtractor.java
@ -0,0 +1,53 @@
 /*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License;
 * you may not use this file except in compliance with the Elastic License.
 */
 package org.elasticsearch.xpack.prelert.scheduler.extractor;
 import org.elasticsearch.search.SearchHit;
 import org.elasticsearch.search.SearchHitField;
 import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
 public final class SearchHitFieldExtractor {
    private SearchHitFieldExtractor() {}
    public static Object[] extractField(SearchHit hit, String field) {
        SearchHitField keyValue = hit.field(field);
        if (keyValue != null) {
            List<Object> values = keyValue.values();
            return values.toArray(new Object[values.size()]);
        } else {
            return extractFieldFromSource(hit.getSource(), field);
        }
    }
    private static Object[] extractFieldFromSource(Map<String, Object> source, String field) {
        if (source != null) {
            Object values = source.get(field);
            if (values != null) {
                if (values instanceof Object[]) {
                    return (Object[]) values;
                } else {
                    return new Object[]{values};
                }
            }
        }
        return new Object[0];
    }
    public static Long extractTimeField(SearchHit hit, String timeField) {
        Object[] fields = extractField(hit, timeField);
        if (fields.length != 1) {
            throw new RuntimeException("Time field [" + timeField + "] expected a single value; actual was: " + Arrays.toString(fields));
        }
        if (fields[0] instanceof Long) {
            return (Long) fields[0];
        }
        throw new RuntimeException("Time field [" + timeField + "] expected a long value; actual was: " + fields[0]);
    }
 }
--- a/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/scheduler/extractor/SearchHitToJsonProcessor.java
+++ b/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/scheduler/extractor/SearchHitToJsonProcessor.java
@ -0,0 +1,49 @@
 /*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License;
 * you may not use this file except in compliance with the Elastic License.
 */
 package org.elasticsearch.xpack.prelert.scheduler.extractor;
 import org.elasticsearch.common.lease.Releasable;
 import org.elasticsearch.common.xcontent.XContentBuilder;
 import org.elasticsearch.common.xcontent.json.JsonXContent;
 import org.elasticsearch.search.SearchHit;
 import java.io.IOException;
 import java.io.OutputStream;
 public class SearchHitToJsonProcessor implements Releasable {
    private final String[] fields;
    private final XContentBuilder jsonBuilder;
    public SearchHitToJsonProcessor(String[] fields, OutputStream outputStream) throws IOException {
        this.fields = fields;
        jsonBuilder = new XContentBuilder(JsonXContent.jsonXContent, outputStream);
    }
    public void process(SearchHit hit) throws IOException {
        jsonBuilder.startObject();
        for (String field : fields) {
            writeKeyValue(field, SearchHitFieldExtractor.extractField(hit, field));
        }
        jsonBuilder.endObject();
    }
    private void writeKeyValue(String key, Object... values) throws IOException {
        if (values.length == 0) {
            return;
        }
        if (values.length == 1) {
            jsonBuilder.field(key, values[0]);
        } else {
            jsonBuilder.array(key, values);
        }
    }
    @Override
    public void close() {
        jsonBuilder.close();
    }
 }
--- a/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/scheduler/extractor/scroll/ScrollDataExtractor.java
+++ b/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/scheduler/extractor/scroll/ScrollDataExtractor.java
@ -0,0 +1,171 @@
 /*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License;
 * you may not use this file except in compliance with the Elastic License.
 */
 package org.elasticsearch.xpack.prelert.scheduler.extractor.scroll;
 import org.apache.logging.log4j.Logger;
 import org.elasticsearch.action.search.ClearScrollAction;
 import org.elasticsearch.action.search.SearchAction;
 import org.elasticsearch.action.search.SearchRequestBuilder;
 import org.elasticsearch.action.search.SearchResponse;
 import org.elasticsearch.action.search.SearchScrollAction;
 import org.elasticsearch.action.search.SearchType;
 import org.elasticsearch.client.Client;
 import org.elasticsearch.common.logging.Loggers;
 import org.elasticsearch.common.unit.TimeValue;
 import org.elasticsearch.index.query.BoolQueryBuilder;
 import org.elasticsearch.index.query.QueryBuilder;
 import org.elasticsearch.index.query.RangeQueryBuilder;
 import org.elasticsearch.rest.RestStatus;
 import org.elasticsearch.search.SearchHit;
 import org.elasticsearch.search.aggregations.AggregationBuilder;
 import org.elasticsearch.search.aggregations.PipelineAggregationBuilder;
 import org.elasticsearch.search.builder.SearchSourceBuilder;
 import org.elasticsearch.search.sort.SortOrder;
 import org.elasticsearch.xpack.prelert.scheduler.extractor.DataExtractor;
 import org.elasticsearch.xpack.prelert.scheduler.extractor.SearchHitFieldExtractor;
 import org.elasticsearch.xpack.prelert.scheduler.extractor.SearchHitToJsonProcessor;
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.NoSuchElementException;
 import java.util.Objects;
 import java.util.Optional;
 import java.util.concurrent.TimeUnit;
 class ScrollDataExtractor implements DataExtractor {
    private static final Logger LOGGER = Loggers.getLogger(ScrollDataExtractor.class);
    private static final TimeValue SCROLL_TIMEOUT = new TimeValue(10, TimeUnit.MINUTES);
    private final Client client;
    private final ScrollDataExtractorContext context;
    private volatile String scrollId;
    private volatile boolean isCancelled;
    private volatile boolean hasNext;
    private volatile Long timestampOnCancel;
    public ScrollDataExtractor(Client client, ScrollDataExtractorContext dataExtractorContext) {
        this.client = Objects.requireNonNull(client);
        this.context = Objects.requireNonNull(dataExtractorContext);
        this.hasNext = true;
    }
    @Override
    public boolean hasNext() {
        return hasNext;
    }
    @Override
    public void cancel() {
        LOGGER.trace("[{}] Data extractor received cancel request", context.jobId);
        isCancelled = true;
    }
    @Override
    public Optional<InputStream> next() throws IOException {
        if (!hasNext()) {
            throw new NoSuchElementException();
        }
        Optional<InputStream> stream = scrollId == null ? Optional.ofNullable(initScroll()) : Optional.ofNullable(continueScroll());
        if (!stream.isPresent()) {
            hasNext = false;
        }
        return stream;
    }
    private InputStream initScroll() throws IOException {
        SearchResponse searchResponse = executeSearchRequest(buildSearchRequest());
        if (searchResponse.status() != RestStatus.OK) {
            throw new IOException("[" + context.jobId + "] Search request returned status code: " + searchResponse.status()
                    + ". Response was:\n" + searchResponse.toString());
        }
        return processSearchResponse(searchResponse);
    }
    protected SearchResponse executeSearchRequest(SearchRequestBuilder searchRequestBuilder) {
        return searchRequestBuilder.get();
    }
    private SearchRequestBuilder buildSearchRequest() {
        SearchRequestBuilder searchRequestBuilder = SearchAction.INSTANCE.newRequestBuilder(client)
                .setScroll(SCROLL_TIMEOUT)
                .setSearchType(SearchType.QUERY_AND_FETCH)
                .addSort(context.timeField, SortOrder.ASC)
                .setIndices(context.indexes)
                .setTypes(context.types)
                .setSize(context.scrollSize)
                .setQuery(createQuery());
        if (context.aggregations != null) {
            searchRequestBuilder.setSize(0);
            for (AggregationBuilder aggregationBuilder : context.aggregations.getAggregatorFactories()) {
                searchRequestBuilder.addAggregation(aggregationBuilder);
            }
            for (PipelineAggregationBuilder pipelineAggregationBuilder : context.aggregations.getPipelineAggregatorFactories()) {
                searchRequestBuilder.addAggregation(pipelineAggregationBuilder);
            }
        }
        for (SearchSourceBuilder.ScriptField scriptField : context.scriptFields) {
            searchRequestBuilder.addScriptField(scriptField.fieldName(), scriptField.script());
        }
        return searchRequestBuilder;
    }
    private InputStream processSearchResponse(SearchResponse searchResponse) throws IOException {
        scrollId = searchResponse.getScrollId();
        if (searchResponse.getHits().hits().length == 0) {
            hasNext = false;
            return null;
        }
        ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
        try (SearchHitToJsonProcessor hitProcessor = new SearchHitToJsonProcessor(context.jobFields, outputStream)) {
            for (SearchHit hit : searchResponse.getHits().hits()) {
                if (isCancelled) {
                    Long timestamp = SearchHitFieldExtractor.extractTimeField(hit, context.timeField);
                    if (timestamp != null) {
                        if (timestampOnCancel == null) {
                            timestampOnCancel = timestamp;
                        } else if (timestamp != timestampOnCancel) {
                            hasNext = false;
                            clearScroll(scrollId);
                            break;
                        }
                    }
                }
                hitProcessor.process(hit);
            }
        }
        return new ByteArrayInputStream(outputStream.toByteArray());
    }
    private InputStream continueScroll() throws IOException {
        SearchResponse searchResponse = executeSearchScrollRequest(scrollId);
        if (searchResponse.status() != RestStatus.OK) {
            throw new IOException("[" + context.jobId + "] Continue search scroll request with id '" + scrollId + "' returned status code: "
                    + searchResponse.status() + ". Response was:\n" + searchResponse.toString());
        }
        return processSearchResponse(searchResponse);
    }
    protected SearchResponse executeSearchScrollRequest(String scrollId) {
        return SearchScrollAction.INSTANCE.newRequestBuilder(client)
                .setScroll(SCROLL_TIMEOUT)
                .setScrollId(scrollId)
                .get();
    }
    private QueryBuilder createQuery() {
        QueryBuilder userQuery = context.query;
        QueryBuilder timeQuery = new RangeQueryBuilder(context.timeField).gte(context.start).lt(context.end).format("epoch_millis");
        return new BoolQueryBuilder().filter(userQuery).filter(timeQuery);
    }
    void clearScroll(String scrollId) {
        ClearScrollAction.INSTANCE.newRequestBuilder(client).addScrollId(scrollId).get();
    }
 }
--- a/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/scheduler/extractor/scroll/ScrollDataExtractorContext.java
+++ b/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/scheduler/extractor/scroll/ScrollDataExtractorContext.java
@ -0,0 +1,46 @@
 /*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License;
 * you may not use this file except in compliance with the Elastic License.
 */
 package org.elasticsearch.xpack.prelert.scheduler.extractor.scroll;
 import org.elasticsearch.common.Nullable;
 import org.elasticsearch.index.query.QueryBuilder;
 import org.elasticsearch.search.aggregations.AggregatorFactories;
 import org.elasticsearch.search.builder.SearchSourceBuilder;
 import java.util.List;
 import java.util.Objects;
 public class ScrollDataExtractorContext {
    final String jobId;
    final String[] jobFields;
    final String timeField;
    final String[] indexes;
    final String[] types;
    final QueryBuilder query;
    @Nullable
    final AggregatorFactories.Builder aggregations;
    final List<SearchSourceBuilder.ScriptField> scriptFields;
    final int scrollSize;
    final long start;
    final long end;
    public ScrollDataExtractorContext(String jobId, List<String> jobFields, String timeField, List<String> indexes, List<String> types,
                                      QueryBuilder query, @Nullable AggregatorFactories.Builder aggregations,
                                      List<SearchSourceBuilder.ScriptField> scriptFields, int scrollSize, long start, long end) {
        this.jobId = Objects.requireNonNull(jobId);
        this.jobFields = jobFields.toArray(new String[jobFields.size()]);
        this.timeField = Objects.requireNonNull(timeField);
        this.indexes = indexes.toArray(new String[indexes.size()]);
        this.types = types.toArray(new String[types.size()]);
        this.query = Objects.requireNonNull(query);
        this.aggregations = aggregations;
        this.scriptFields = Objects.requireNonNull(scriptFields);
        this.scrollSize = scrollSize;
        this.start = start;
        this.end = end;
    }
 }
--- a/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/scheduler/extractor/scroll/ScrollDataExtractorFactory.java
+++ b/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/scheduler/extractor/scroll/ScrollDataExtractorFactory.java
@ -0,0 +1,45 @@
 /*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License;
 * you may not use this file except in compliance with the Elastic License.
 */
 package org.elasticsearch.xpack.prelert.scheduler.extractor.scroll;
 import org.elasticsearch.client.Client;
 import org.elasticsearch.xpack.prelert.job.Job;
 import org.elasticsearch.xpack.prelert.scheduler.SchedulerConfig;
 import org.elasticsearch.xpack.prelert.scheduler.extractor.DataExtractor;
 import org.elasticsearch.xpack.prelert.scheduler.extractor.DataExtractorFactory;
 import java.util.Objects;
 public class ScrollDataExtractorFactory implements DataExtractorFactory {
    private final Client client;
    private final SchedulerConfig schedulerConfig;
    private final Job job;
    public ScrollDataExtractorFactory(Client client, SchedulerConfig schedulerConfig, Job job) {
        this.client = Objects.requireNonNull(client);
        this.schedulerConfig = Objects.requireNonNull(schedulerConfig);
        this.job = Objects.requireNonNull(job);
    }
    @Override
    public DataExtractor newExtractor(long start, long end) {
        String timeField = job.getDataDescription().getTimeField();
        ScrollDataExtractorContext dataExtractorContext = new ScrollDataExtractorContext(
                job.getId(),
                job.allFields(),
                timeField,
                schedulerConfig.getIndexes(),
                schedulerConfig.getTypes(),
                schedulerConfig.getQuery(),
                schedulerConfig.getAggregations(),
                schedulerConfig.getScriptFields(),
                schedulerConfig.getScrollSize(),
                start,
                end);
        return new ScrollDataExtractor(client, dataExtractorContext);
    }
 }
--- a/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/scheduler/http/ElasticsearchDataExtractor.java
+++ b/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/scheduler/http/ElasticsearchDataExtractor.java
@ -1,250 +0,0 @@
 /*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License;
 * you may not use this file except in compliance with the Elastic License.
 */
 package org.elasticsearch.xpack.prelert.scheduler.http;
 import org.apache.logging.log4j.Logger;
 import org.elasticsearch.xpack.prelert.job.extraction.DataExtractor;
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.Locale;
 import java.util.NoSuchElementException;
 import java.util.Objects;
 import java.util.Optional;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 public class ElasticsearchDataExtractor implements DataExtractor {
    private static final String CLEAR_SCROLL_TEMPLATE = "{\"scroll_id\":[\"%s\"]}";
    private static final Pattern TOTAL_HITS_PATTERN = Pattern.compile("\"hits\":\\{.*?\"total\":(.*?),");
    private static final Pattern EARLIEST_TIME_PATTERN = Pattern.compile("\"earliestTime\":\\{.*?\"value\":(.*?),");
    private static final Pattern LATEST_TIME_PATTERN = Pattern.compile("\"latestTime\":\\{.*?\"value\":(.*?),");
    private static final Pattern INDEX_PATTERN = Pattern.compile("\"_index\":\"(.*?)\"");
    private static final Pattern NUMBER_OF_SHARDS_PATTERN = Pattern.compile("\"number_of_shards\":\"(.*?)\"");
    private static final long CHUNK_THRESHOLD_MS = 3600000;
    private static final long MIN_CHUNK_SIZE_MS = 10000L;
    private final HttpRequester httpRequester;
    private final ElasticsearchUrlBuilder urlBuilder;
    private final ElasticsearchQueryBuilder queryBuilder;
    private final int scrollSize;
    private final ScrollState scrollState;
    private volatile long currentStartTime;
    private volatile long currentEndTime;
    private volatile long endTime;
    private volatile boolean isFirstSearch;
    private volatile boolean isCancelled;
    /**
     * The interval of each scroll search. Will be null when search is not chunked.
     */
    private volatile Long m_Chunk;
    private volatile Logger m_Logger;
    public ElasticsearchDataExtractor(HttpRequester httpRequester, ElasticsearchUrlBuilder urlBuilder,
            ElasticsearchQueryBuilder queryBuilder, int scrollSize) {
        this.httpRequester = Objects.requireNonNull(httpRequester);
        this.urlBuilder = Objects.requireNonNull(urlBuilder);
        this.scrollSize = scrollSize;
        this.queryBuilder = Objects.requireNonNull(queryBuilder);
        scrollState =  queryBuilder.isAggregated() ? ScrollState.createAggregated() : ScrollState.createDefault();
        isFirstSearch = true;
    }
    @Override
    public void newSearch(long startEpochMs, long endEpochMs, Logger logger) throws IOException {
        m_Logger = logger;
        m_Logger.info("Requesting data from '" + urlBuilder.getBaseUrl() + "' within [" + startEpochMs + ", " + endEpochMs + ")");
        scrollState.reset();
        currentStartTime = startEpochMs;
        currentEndTime = endEpochMs;
        endTime = endEpochMs;
        isCancelled = false;
        if (endEpochMs - startEpochMs > CHUNK_THRESHOLD_MS) {
            setUpChunkedSearch();
        }
        if (isFirstSearch) {
            queryBuilder.logQueryInfo(m_Logger);
            isFirstSearch = false;
        }
    }
    private void setUpChunkedSearch() throws IOException {
        m_Chunk = null;
        String url = urlBuilder.buildSearchSizeOneUrl();
        String response = requestAndGetStringResponse(url, queryBuilder.createDataSummaryQuery(currentStartTime, endTime));
        long totalHits = matchLong(response, TOTAL_HITS_PATTERN);
        if (totalHits > 0) {
            // Aggregation value may be a double
            currentStartTime = (long) matchDouble(response, EARLIEST_TIME_PATTERN);
            long latestTime = (long) matchDouble(response, LATEST_TIME_PATTERN);
            long dataTimeSpread = latestTime - currentStartTime;
            String index = matchString(response, INDEX_PATTERN);
            long shards = readNumberOfShards(index);
            m_Chunk = Math.max(MIN_CHUNK_SIZE_MS, (shards * scrollSize * dataTimeSpread) / totalHits);
            currentEndTime = currentStartTime + m_Chunk;
            m_Logger.debug("Chunked search configured:  totalHits = " + totalHits
                    + ", dataTimeSpread = " + dataTimeSpread + " ms, chunk size = " + m_Chunk
                    + " ms");
        } else {
            currentStartTime = endTime;
        }
    }
    private String requestAndGetStringResponse(String url, String body) throws IOException {
        m_Logger.trace("url ={}, body={}", url, body);
        HttpResponse response = httpRequester.get(url, body);
        if (response.getResponseCode() != HttpResponse.OK_STATUS) {
            throw new IOException("Request '" + url + "' failed with status code: "
                    + response.getResponseCode() + ". Response was:\n" + response.getResponseAsString());
        }
        return response.getResponseAsString();
    }
    private static long matchLong(String response, Pattern pattern) throws IOException {
        String match = matchString(response, pattern);
        try {
            return Long.parseLong(match);
        } catch (NumberFormatException e) {
            throw new IOException("Failed to parse long from pattern '" + pattern + "'. Response was:\n" + response, e);
        }
    }
    private static double matchDouble(String response, Pattern pattern) throws IOException {
        String match = matchString(response, pattern);
        try {
            return Double.parseDouble(match);
        } catch (NumberFormatException e) {
            throw new IOException("Failed to parse double from pattern '" + pattern + "'. Response was:\n" + response, e);
        }
    }
    private static String matchString(String response, Pattern pattern) throws IOException {
        Matcher matcher = pattern.matcher(response);
        if (!matcher.find()) {
            throw new IOException("Failed to parse string from pattern '" + pattern + "'. Response was:\n" + response);
        }
        return matcher.group(1);
    }
    private long readNumberOfShards(String index) throws IOException {
        String url = urlBuilder.buildIndexSettingsUrl(index);
        String response = requestAndGetStringResponse(url, null);
        return matchLong(response, NUMBER_OF_SHARDS_PATTERN);
    }
    @Override
    public void clear() {
        scrollState.reset();
    }
    @Override
    public Optional<InputStream> next() throws IOException {
        if (!hasNext()) {
            throw new NoSuchElementException();
        }
        try {
            return getNextStream();
        } catch (IOException e) {
            m_Logger.error("An error occurred during requesting data from: " + urlBuilder.getBaseUrl(), e);
            scrollState.forceComplete();
            throw e;
        }
    }
    private Optional<InputStream> getNextStream() throws IOException {
        while (hasNext()) {
            boolean isNewScroll = scrollState.getScrollId() == null || scrollState.isComplete();
            InputStream stream = isNewScroll ? initScroll() : continueScroll();
            stream = scrollState.updateFromStream(stream);
            if (scrollState.isComplete()) {
                clearScroll();
                advanceTime();
                // If it was a new scroll it means it returned 0 hits. If we are doing
                // a chunked search, we reconfigure the search in order to jump to the next
                // time interval where there are data.
                if (isNewScroll && hasNext() && !isCancelled && m_Chunk != null) {
                    setUpChunkedSearch();
                }
            } else {
                return Optional.of(stream);
            }
        }
        return Optional.empty();
    }
    private void clearScroll() {
        if (scrollState.getScrollId() == null) {
            return;
        }
        String url = urlBuilder.buildClearScrollUrl();
        try {
            HttpResponse response = httpRequester.delete(url, String.format(Locale.ROOT, CLEAR_SCROLL_TEMPLATE, scrollState.getScrollId()));
            // This is necessary to ensure the response stream has been consumed entirely.
            // Failing to do this can cause a lot of issues with Elasticsearch when
            // scheduled jobs are running concurrently.
            response.getResponseAsString();
        } catch (IOException e) {
            m_Logger.error("An error ocurred during clearing scroll context", e);
        }
        scrollState.clearScrollId();
    }
    @Override
    public boolean hasNext() {
        return !scrollState.isComplete() || (!isCancelled && currentStartTime < endTime);
    }
    private InputStream initScroll() throws IOException {
        String url = buildInitScrollUrl();
        String searchBody = queryBuilder.createSearchBody(currentStartTime, currentEndTime);
        m_Logger.trace("About to submit body " + searchBody + " to URL " + url);
        HttpResponse response = httpRequester.get(url, searchBody);
        if (response.getResponseCode() != HttpResponse.OK_STATUS) {
            throw new IOException("Request '" + url + "' failed with status code: "
                    + response.getResponseCode() + ". Response was:\n" + response.getResponseAsString());
        }
        return response.getStream();
    }
    private void advanceTime() {
        currentStartTime = currentEndTime;
        currentEndTime = m_Chunk == null ? endTime : Math.min(currentStartTime + m_Chunk, endTime);
    }
    private String buildInitScrollUrl() throws IOException {
        // With aggregations we don't want any hits returned for the raw data,
        // just the aggregations
        int size = queryBuilder.isAggregated() ? 0 : scrollSize;
        return urlBuilder.buildInitScrollUrl(size);
    }
    private InputStream continueScroll() throws IOException {
        // Aggregations never need a continuation
        if (!queryBuilder.isAggregated()) {
            String url = urlBuilder.buildContinueScrollUrl();
            HttpResponse response = httpRequester.get(url, scrollState.getScrollId());
            if (response.getResponseCode() == HttpResponse.OK_STATUS) {
                return response.getStream();
            }
            throw new IOException("Request '"  + url + "' with scroll id '"
                    + scrollState.getScrollId() + "' failed with status code: "
                    + response.getResponseCode() + ". Response was:\n"
                    + response.getResponseAsString());
        }
        return null;
    }
    @Override
    public void cancel() {
        isCancelled = true;
    }
 }
--- a/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/scheduler/http/ElasticsearchQueryBuilder.java
+++ b/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/scheduler/http/ElasticsearchQueryBuilder.java
@ -1,129 +0,0 @@
 /*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License;
 * you may not use this file except in compliance with the Elastic License.
 */
 package org.elasticsearch.xpack.prelert.scheduler.http;
 import org.apache.logging.log4j.Logger;
 import java.time.Instant;
 import java.time.ZoneOffset;
 import java.time.ZonedDateTime;
 import java.time.format.DateTimeFormatter;
 import java.util.Locale;
 import java.util.Objects;
 public class ElasticsearchQueryBuilder {
    /**
     * The search body for Elasticsearch version 2.x contains sorting
     * based on the time field and a query. The query is composed by
     * a bool query with two must clauses, the recommended way to perform an AND query.
     * There are 6 placeholders:
     * <ol>
     *   <li> sort field
     *   <li> user defined query
     *   <li> time field
     *   <li> start time (String in date_time format)
     *   <li> end time (String in date_time format)
     *   <li> extra (may be empty or contain aggregations, fields, etc.)
     * </ol>
     */
    private static final String SEARCH_BODY_TEMPLATE_2_X = "{"
            + "\"sort\": ["
            +   "{\"%s\": {\"order\": \"asc\"}}"
            + "],"
            + "\"query\": {"
            +   "\"bool\": {"
            +     "\"filter\": ["
            +       "%s,"
            +       "{"
            +         "\"range\": {"
            +           "\"%s\": {"
            +             "\"gte\": \"%s\","
            +             "\"lt\": \"%s\","
            +             "\"format\": \"date_time\""
            +           "}"
            +         "}"
            +       "}"
            +     "]"
            +   "}"
            + "}%s"
            + "}";
    private static final String DATA_SUMMARY_SORT_FIELD = "_doc";
    /**
     * Aggregations in order to retrieve the earliest and latest record times.
     * The single placeholder expects the time field.
     */
    private static final String DATA_SUMMARY_AGGS_TEMPLATE = ""
            + "{"
            +   "\"earliestTime\":{"
            +     "\"min\":{\"field\":\"%1$s\"}"
            +   "},"
            +   "\"latestTime\":{"
            +     "\"max\":{\"field\":\"%1$s\"}"
            +   "}"
            + "}";
    private static final String AGGREGATION_TEMPLATE = ", \"aggs\": %s";
    private static final String SCRIPT_FIELDS_TEMPLATE = ", \"script_fields\": %s";
    private final String search;
    private final String aggregations;
    private final String scriptFields;
    private final String timeField;
    public ElasticsearchQueryBuilder(String search, String aggs, String scriptFields, String timeField) {
        this.search = Objects.requireNonNull(search);
        aggregations = aggs;
        this.scriptFields = scriptFields;
        this.timeField = Objects.requireNonNull(timeField);
    }
    public String createSearchBody(long start, long end) {
        return createSearchBody(start, end, timeField, aggregations);
    }
    private String createSearchBody(long start, long end, String sortField, String aggs) {
        return String.format(Locale.ROOT, SEARCH_BODY_TEMPLATE_2_X, sortField, search, timeField, formatAsDateTime(start),
                formatAsDateTime(end), createResultsFormatSpec(aggs));
    }
    private static String formatAsDateTime(long epochMs) {
        Instant instant = Instant.ofEpochMilli(epochMs);
        ZonedDateTime dateTime = ZonedDateTime.ofInstant(instant, ZoneOffset.UTC);
        return dateTime.format(DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSSXXX", Locale.ROOT));
    }
    private String createResultsFormatSpec(String aggs) {
        return (aggs != null) ? createAggregations(aggs) : createScriptFields();
    }
    private String createAggregations(String aggs) {
        return String.format(Locale.ROOT, AGGREGATION_TEMPLATE, aggs);
    }
    private String createScriptFields() {
        return (scriptFields != null) ? String.format(Locale.ROOT, SCRIPT_FIELDS_TEMPLATE, scriptFields) : "";
    }
    public String createDataSummaryQuery(long start, long end) {
        String aggs = String.format(Locale.ROOT, DATA_SUMMARY_AGGS_TEMPLATE, timeField);
        return createSearchBody(start, end, DATA_SUMMARY_SORT_FIELD, aggs);
    }
    public void logQueryInfo(Logger logger) {
        if (aggregations != null) {
            logger.debug("Will use the following Elasticsearch aggregations: " + aggregations);
        } else {
            logger.debug("Will retrieve whole _source document from Elasticsearch");
        }
    }
    public boolean isAggregated() {
        return aggregations != null;
    }
 }
--- a/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/scheduler/http/ElasticsearchUrlBuilder.java
+++ b/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/scheduler/http/ElasticsearchUrlBuilder.java
@ -1,86 +0,0 @@
 /*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License;
 * you may not use this file except in compliance with the Elastic License.
 */
 package org.elasticsearch.xpack.prelert.scheduler.http;
 import java.util.List;
 import java.util.Locale;
 import java.util.Objects;
 import java.util.stream.Collectors;
 public class ElasticsearchUrlBuilder {
    private static final String SLASH = "/";
    private static final String COMMA = ",";
    private static final int SCROLL_CONTEXT_MINUTES = 60;
    private static final String INDEX_SETTINGS_END_POINT = "%s/_settings";
    private static final String SEARCH_SIZE_ONE_END_POINT = "_search?size=1";
    private static final String SEARCH_SCROLL_END_POINT = "_search?scroll=" + SCROLL_CONTEXT_MINUTES + "m&size=%d";
    private static final String CONTINUE_SCROLL_END_POINT = "_search/scroll?scroll=" + SCROLL_CONTEXT_MINUTES + "m";
    private static final String CLEAR_SCROLL_END_POINT = "_search/scroll";
    private final String baseUrl;
    private final String indexes;
    private final String types;
    private ElasticsearchUrlBuilder(String baseUrl, String indexes, String types) {
        this.baseUrl = Objects.requireNonNull(baseUrl);
        this.indexes = Objects.requireNonNull(indexes);
        this.types = Objects.requireNonNull(types);
    }
    public static ElasticsearchUrlBuilder create(List<String> indexes, List<String> types) {
        // norelease: This class will be removed once we switch to a client based data extractor
        return create(indexes, types, "http://localhost:9200/");
    }
    public static ElasticsearchUrlBuilder create(List<String> indexes, List<String> types, String baseUrl) {
        String indexesAsString = indexes.stream().collect(Collectors.joining(COMMA));
        String typesAsString = types.stream().collect(Collectors.joining(COMMA));
        return new ElasticsearchUrlBuilder(baseUrl, indexesAsString, typesAsString);
    }
    public String buildIndexSettingsUrl(String index) {
        return newUrlBuilder().append(String.format(Locale.ROOT, INDEX_SETTINGS_END_POINT, index)).toString();
    }
    public String buildSearchSizeOneUrl() {
        return buildUrlWithIndicesAndTypes().append(SEARCH_SIZE_ONE_END_POINT).toString();
    }
    public String buildInitScrollUrl(int scrollSize) {
        return buildUrlWithIndicesAndTypes()
                .append(String.format(Locale.ROOT, SEARCH_SCROLL_END_POINT, scrollSize))
                .toString();
    }
    public String buildContinueScrollUrl() {
        return newUrlBuilder().append(CONTINUE_SCROLL_END_POINT).toString();
    }
    public String buildClearScrollUrl() {
        return newUrlBuilder().append(CLEAR_SCROLL_END_POINT).toString();
    }
    private StringBuilder newUrlBuilder() {
        return new StringBuilder(baseUrl);
    }
    private StringBuilder buildUrlWithIndicesAndTypes() {
        StringBuilder urlBuilder = buildUrlWithIndices();
        if (!types.isEmpty()) {
            urlBuilder.append(types).append(SLASH);
        }
        return urlBuilder;
    }
    private StringBuilder buildUrlWithIndices() {
        return newUrlBuilder().append(indexes).append(SLASH);
    }
    public String getBaseUrl() {
        return baseUrl;
    }
 }
--- a/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/scheduler/http/HttpDataExtractorFactory.java
+++ b/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/scheduler/http/HttpDataExtractorFactory.java
@ -1,112 +0,0 @@
 /*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License;
 * you may not use this file except in compliance with the Elastic License.
 */
 package org.elasticsearch.xpack.prelert.scheduler.http;
 import org.apache.logging.log4j.Logger;
 import org.elasticsearch.ElasticsearchParseException;
 import org.elasticsearch.action.admin.cluster.node.info.NodesInfoAction;
 import org.elasticsearch.action.admin.cluster.node.info.NodesInfoRequest;
 import org.elasticsearch.action.admin.cluster.node.info.NodesInfoResponse;
 import org.elasticsearch.client.Client;
 import org.elasticsearch.common.logging.Loggers;
 import org.elasticsearch.common.transport.TransportAddress;
 import org.elasticsearch.common.xcontent.ToXContent;
 import org.elasticsearch.common.xcontent.XContentBuilder;
 import org.elasticsearch.common.xcontent.XContentFactory;
 import org.elasticsearch.common.xcontent.json.JsonXContent;
 import org.elasticsearch.search.SearchRequestParsers;
 import org.elasticsearch.search.aggregations.AggregatorFactories;
 import org.elasticsearch.search.builder.SearchSourceBuilder;
 import org.elasticsearch.xpack.prelert.job.Job;
 import org.elasticsearch.xpack.prelert.job.extraction.DataExtractor;
 import org.elasticsearch.xpack.prelert.job.extraction.DataExtractorFactory;
 import org.elasticsearch.xpack.prelert.scheduler.SchedulerConfig;
 import org.elasticsearch.xpack.prelert.utils.FixBlockingClientOperations;
 import java.io.IOException;
 import java.util.List;
 import java.util.Map;
 import java.util.Objects;
 public class HttpDataExtractorFactory implements DataExtractorFactory {
    private static final Logger LOGGER = Loggers.getLogger(HttpDataExtractorFactory.class);
    private final Client client;
    private final SearchRequestParsers searchRequestParsers;
    public HttpDataExtractorFactory(Client client, SearchRequestParsers searchRequestParsers) {
        this.client = Objects.requireNonNull(client);
        this.searchRequestParsers = Objects.requireNonNull(searchRequestParsers);
    }
    @Override
    public DataExtractor newExtractor(SchedulerConfig schedulerConfig, Job job) {
        String timeField = job.getDataDescription().getTimeField();
        ElasticsearchQueryBuilder queryBuilder = new ElasticsearchQueryBuilder(
                xContentToJson(schedulerConfig.getQuery()),
                stringifyAggregations(schedulerConfig.getAggregations()),
                stringifyScriptFields(schedulerConfig.getScriptFields()),
                timeField);
        HttpRequester httpRequester = new HttpRequester();
        ElasticsearchUrlBuilder urlBuilder = ElasticsearchUrlBuilder
                .create(schedulerConfig.getIndexes(), schedulerConfig.getTypes(), getBaseUrl());
        return new ElasticsearchDataExtractor(httpRequester, urlBuilder, queryBuilder, schedulerConfig.getScrollSize());
    }
    private String getBaseUrl() {
        NodesInfoRequest request = new NodesInfoRequest();
        NodesInfoResponse nodesInfoResponse = FixBlockingClientOperations.executeBlocking(client, NodesInfoAction.INSTANCE, request);
        TransportAddress address = nodesInfoResponse.getNodes().get(0).getHttp().getAddress().publishAddress();
        String baseUrl = "http://" + address.getAddress() + ":" + address.getPort() + "/";
        LOGGER.info("Base URL: " + baseUrl);
        return baseUrl;
    }
    private String xContentToJson(ToXContent xContent) {
        try {
            XContentBuilder jsonBuilder = JsonXContent.contentBuilder();
            xContent.toXContent(jsonBuilder, ToXContent.EMPTY_PARAMS);
            return jsonBuilder.string();
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }
    String stringifyAggregations(AggregatorFactories.Builder aggregations) {
        if (aggregations == null) {
            return null;
        }
        return xContentToJson(aggregations);
    }
    String stringifyScriptFields(List<SearchSourceBuilder.ScriptField> scriptFields) {
        if (scriptFields.isEmpty()) {
            return null;
        }
        try {
            XContentBuilder jsonBuilder = JsonXContent.contentBuilder();
            jsonBuilder.startObject();
            for (SearchSourceBuilder.ScriptField scriptField : scriptFields) {
                scriptField.toXContent(jsonBuilder, ToXContent.EMPTY_PARAMS);
            }
            jsonBuilder.endObject();
            return jsonBuilder.string();
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }
    private static String writeMapAsJson(Map<String, Object> map) {
        try {
            XContentBuilder builder = XContentFactory.jsonBuilder();
            builder.map(map);
            return builder.string();
        } catch (IOException e) {
            throw new ElasticsearchParseException("failed to convert map to JSON string", e);
        }
    }
 }
--- a/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/scheduler/http/HttpRequester.java
+++ b/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/scheduler/http/HttpRequester.java
@ -1,160 +0,0 @@
 /*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License;
 * you may not use this file except in compliance with the Elastic License.
 */
 package org.elasticsearch.xpack.prelert.scheduler.http;
 import org.apache.logging.log4j.Logger;
 import org.elasticsearch.common.logging.Loggers;
 import javax.net.ssl.HostnameVerifier;
 import javax.net.ssl.HttpsURLConnection;
 import javax.net.ssl.SSLContext;
 import javax.net.ssl.SSLSession;
 import javax.net.ssl.SSLSocketFactory;
 import javax.net.ssl.TrustManager;
 import javax.net.ssl.X509TrustManager;
 import java.io.DataOutputStream;
 import java.io.IOException;
 import java.net.HttpURLConnection;
 import java.net.URL;
 import java.security.KeyManagementException;
 import java.security.NoSuchAlgorithmException;
 import java.security.cert.CertificateException;
 import java.security.cert.X509Certificate;
 /**
 * Executes requests from an HTTP or HTTPS URL by sending a request body.
 * HTTP or HTTPS is deduced from the supplied URL.
 * Invalid certificates are tolerated for HTTPS access, similar to "curl -k".
 */
 public class HttpRequester {
    private static final Logger LOGGER = Loggers.getLogger(HttpRequester.class);
    private static final String TLS = "TLS";
    private static final String GET = "GET";
    private static final String DELETE = "DELETE";
    private static final String AUTH_HEADER = "Authorization";
    private static final String CONTENT_TYPE_HEADER = "Content-Type";
    private static final SSLSocketFactory TRUSTING_SOCKET_FACTORY;
    private static final HostnameVerifier TRUSTING_HOSTNAME_VERIFIER;
    private static final int CONNECT_TIMEOUT_MILLIS = 30000;
    private static final int READ_TIMEOUT_MILLIS = 600000;
    static {
        SSLSocketFactory trustingSocketFactory = null;
        try {
            SSLContext sslContext = SSLContext.getInstance(TLS);
            sslContext.init(null, new TrustManager[]{ new NoOpTrustManager() }, null);
            trustingSocketFactory = sslContext.getSocketFactory();
        } catch (KeyManagementException | NoSuchAlgorithmException e) {
            LOGGER.warn("Unable to set up trusting socket factory", e);
        }
        TRUSTING_SOCKET_FACTORY = trustingSocketFactory;
        TRUSTING_HOSTNAME_VERIFIER = new NoOpHostnameVerifier();
    }
    private final String authHeader;
    private final String contentTypeHeader;
    public HttpRequester() {
        this(null);
    }
    public HttpRequester(String authHeader) {
        this(authHeader, null);
    }
    public HttpRequester(String authHeader, String contentTypeHeader) {
        this.authHeader = authHeader;
        this.contentTypeHeader = contentTypeHeader;
    }
    public HttpResponse get(String url, String requestBody) throws IOException {
        return request(url, requestBody, GET);
    }
    public HttpResponse delete(String url, String requestBody) throws IOException    {
        return request(url, requestBody, DELETE);
    }
    private HttpResponse request(String url, String requestBody, String method) throws IOException {
        URL urlObject = new URL(url);
        HttpURLConnection connection = (HttpURLConnection) urlObject.openConnection();
        connection.setConnectTimeout(CONNECT_TIMEOUT_MILLIS);
        connection.setReadTimeout(READ_TIMEOUT_MILLIS);
        // TODO: we could add a config option to allow users who want to
        // rigorously enforce valid certificates to do so
        if (connection instanceof HttpsURLConnection) {
            // This is the equivalent of "curl -k", i.e. tolerate connecting to
            // an Elasticsearch with a self-signed certificate or a certificate
            // that doesn't match its hostname.
            HttpsURLConnection httpsConnection = (HttpsURLConnection)connection;
            if (TRUSTING_SOCKET_FACTORY != null) {
                httpsConnection.setSSLSocketFactory(TRUSTING_SOCKET_FACTORY);
            }
            httpsConnection.setHostnameVerifier(TRUSTING_HOSTNAME_VERIFIER);
        }
        connection.setRequestMethod(method);
        if (authHeader != null) {
            connection.setRequestProperty(AUTH_HEADER, authHeader);
        }
        if (contentTypeHeader != null) {
            connection.setRequestProperty(CONTENT_TYPE_HEADER, contentTypeHeader);
        }
        if (requestBody != null) {
            connection.setDoOutput(true);
            writeRequestBody(requestBody, connection);
        }
        if (connection.getResponseCode() != HttpResponse.OK_STATUS) {
            return new HttpResponse(connection.getErrorStream(), connection.getResponseCode());
        }
        return new HttpResponse(connection.getInputStream(), connection.getResponseCode());
    }
    private static void writeRequestBody(String requestBody, HttpURLConnection connection) throws IOException {
        DataOutputStream dataOutputStream = new DataOutputStream(connection.getOutputStream());
        dataOutputStream.writeBytes(requestBody);
        dataOutputStream.flush();
        dataOutputStream.close();
    }
    /**
     * Hostname verifier that ignores hostname discrepancies.
     */
    private static final class NoOpHostnameVerifier implements HostnameVerifier {
        @Override
        public boolean verify(String hostname, SSLSession session) {
            return true;
        }
    }
    /**
     * Certificate trust manager that ignores certificate issues.
     */
    private static final class NoOpTrustManager implements X509TrustManager {
        private static final X509Certificate[] EMPTY_CERTIFICATE_ARRAY = new X509Certificate[0];
        @Override
        public void checkClientTrusted(X509Certificate[] chain, String authType) throws CertificateException {
            // Ignore certificate problems
        }
        @Override
        public void checkServerTrusted(X509Certificate[] chain, String authType) throws CertificateException {
            // Ignore certificate problems
        }
        @Override
        public X509Certificate[] getAcceptedIssuers() {
            return EMPTY_CERTIFICATE_ARRAY;
        }
    }
 }
--- a/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/scheduler/http/HttpResponse.java
+++ b/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/scheduler/http/HttpResponse.java
@ -1,53 +0,0 @@
 /*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License;
 * you may not use this file except in compliance with the Elastic License.
 */
 package org.elasticsearch.xpack.prelert.scheduler.http;
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.nio.charset.StandardCharsets;
 import java.util.stream.Collectors;
 /**
 * Encapsulates the HTTP response stream and the status code.
 *
 * <p><b>Important note</b>: The stream has to be consumed thoroughly.
 * Java is keeping connections alive thus reusing them and any
 * streams with dangling data can lead to problems.
 */
 class HttpResponse {
    public static final int OK_STATUS = 200;
    private static final String NEW_LINE = "\n";
    private final InputStream stream;
    private final int responseCode;
    public HttpResponse(InputStream responseStream, int responseCode) {
        stream = responseStream;
        this.responseCode = responseCode;
    }
    public int getResponseCode() {
        return responseCode;
    }
    public InputStream getStream() {
        return stream;
    }
    public String getResponseAsString() throws IOException {
        return getStreamAsString(stream);
    }
    public static String getStreamAsString(InputStream stream) throws IOException {
        try (BufferedReader buffer = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))) {
            return buffer.lines().collect(Collectors.joining(NEW_LINE));
        }
    }
 }
--- a/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/scheduler/http/ScrollState.java
+++ b/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/scheduler/http/ScrollState.java
@ -1,166 +0,0 @@
 /*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License;
 * you may not use this file except in compliance with the Elastic License.
 */
 package org.elasticsearch.xpack.prelert.scheduler.http;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.PushbackInputStream;
 import java.nio.charset.StandardCharsets;
 import java.util.Objects;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 /**
 * Holds the state of an Elasticsearch scroll.
 */
 class ScrollState {
    private static final Pattern SCROLL_ID_PATTERN = Pattern.compile("\"_scroll_id\":\"(.*?)\"");
    private static final Pattern DEFAULT_PEEK_END_PATTERN = Pattern.compile("\"hits\":\\[(.)");
    private static final Pattern AGGREGATED_PEEK_END_PATTERN = Pattern.compile("\"aggregations\":.*?\"buckets\":\\[(.)");
    private static final String CLOSING_SQUARE_BRACKET = "]";
    /**
     * We want to read up until the "hits" or "buckets" array. Scroll IDs can be
     * quite long in clusters that have many nodes/shards. The longest reported
     * scroll ID is 20708 characters - see
     * http://elasticsearch-users.115913.n3.nabble.com/Ridiculously-long-Scroll-id-td4038567.html
     * <br>
     * We set a max byte limit for the stream peeking to 1 MB.
     */
    private static final int MAX_PEEK_BYTES = 1024 * 1024;
    /**
     * We try to search for the scroll ID and whether the scroll is complete every time
     * we have read a chunk of size 32 KB.
     */
    private static final int PEEK_CHUNK_SIZE = 32 * 1024;
    private final int peekMaxSize;
    private final int peekChunkSize;
    private final Pattern peekEndPattern;
    private volatile String scrollId;
    private volatile boolean isComplete;
    private ScrollState(int peekMaxSize, int peekChunkSize, Pattern scrollCompletePattern) {
        this.peekMaxSize = peekMaxSize;
        this.peekChunkSize = peekChunkSize;
        peekEndPattern = Objects.requireNonNull(scrollCompletePattern);
    }
    /**
     * Creates a {@code ScrollState} for a search without aggregations
     * @return the {@code ScrollState}
     */
    public static ScrollState createDefault() {
        return new ScrollState(MAX_PEEK_BYTES, PEEK_CHUNK_SIZE, DEFAULT_PEEK_END_PATTERN);
    }
    /**
     * Creates a {@code ScrollState} for a search with aggregations
     * @return the {@code ScrollState}
     */
    public static ScrollState createAggregated() {
        return new ScrollState(MAX_PEEK_BYTES, PEEK_CHUNK_SIZE, AGGREGATED_PEEK_END_PATTERN);
    }
    public final void reset() {
        scrollId = null;
        isComplete = false;
    }
    public void clearScrollId() {
        scrollId = null;
    }
    public String getScrollId() {
        return scrollId;
    }
    public boolean isComplete() {
        return isComplete;
    }
    public void forceComplete() {
        isComplete = true;
    }
    /**
     * Peeks into the stream and updates the scroll ID and whether the scroll is complete.
     * <p>
     * <em>After calling that method the given stream cannot be reused.
     * Use the returned stream instead.</em>
     * </p>
     *
     * @param stream the stream
     * @return a new {@code InputStream} object which should be used instead of the given stream
     * for further processing
     * @throws IOException if an I/O error occurs while manipulating the stream or the stream
     * contains no scroll ID
     */
    public InputStream updateFromStream(InputStream stream) throws IOException {
        if (stream == null) {
            isComplete = true;
            return null;
        }
        PushbackInputStream pushbackStream = new PushbackInputStream(stream, peekMaxSize);
        byte[] buffer = new byte[peekMaxSize];
        int totalBytesRead = 0;
        int currentChunkSize = 0;
        int bytesRead = 0;
        while (bytesRead >= 0 && totalBytesRead < peekMaxSize) {
            bytesRead = stream.read(buffer, totalBytesRead, Math.min(peekChunkSize, peekMaxSize - totalBytesRead));
            if (bytesRead > 0) {
                totalBytesRead += bytesRead;
                currentChunkSize += bytesRead;
            }
            if (bytesRead < 0 || currentChunkSize >= peekChunkSize) {
                // We make the assumption here that invalid byte sequences will be read as invalid
                // char rather than throwing an exception
                String peekString = new String(buffer, 0, totalBytesRead, StandardCharsets.UTF_8);
                if (matchScrollState(peekString)) {
                    break;
                }
                currentChunkSize = 0;
            }
        }
        pushbackStream.unread(buffer, 0, totalBytesRead);
        if (scrollId == null) {
            throw new IOException("Field '_scroll_id' was expected but not found in first "
                    + totalBytesRead + " bytes of response:\n"
                    + HttpResponse.getStreamAsString(pushbackStream));
        }
        return pushbackStream;
    }
    /**
     * Searches the peek end pattern into the given {@code sequence}. If it is matched,
     * it also searches for the scroll ID and updates the state accordingly.
     *
     * @param sequence the String to search into
     * @return {@code true} if the peek end pattern was matched or {@code false} otherwise
     */
    private boolean matchScrollState(String sequence) {
        Matcher peekEndMatcher = peekEndPattern.matcher(sequence);
        if (peekEndMatcher.find()) {
            Matcher scrollIdMatcher = SCROLL_ID_PATTERN.matcher(sequence);
            if (!scrollIdMatcher.find()) {
                scrollId = null;
            } else {
                scrollId = scrollIdMatcher.group(1);
                isComplete = CLOSING_SQUARE_BRACKET.equals(peekEndMatcher.group(1));
                return true;
            }
        }
        return false;
    }
 }
--- a/elasticsearch/src/test/java/org/elasticsearch/xpack/prelert/job/process/autodetect/writer/SimpleJsonRecordReaderTests.java
+++ b/elasticsearch/src/test/java/org/elasticsearch/xpack/prelert/job/process/autodetect/writer/SimpleJsonRecordReaderTests.java
@ -5,16 +5,15 @@
 */
 package org.elasticsearch.xpack.prelert.job.process.autodetect.writer;
 import com.fasterxml.jackson.core.JsonFactory;
 import com.fasterxml.jackson.core.JsonParseException;
 import com.fasterxml.jackson.core.JsonParser;
 import org.apache.logging.log4j.Logger;
 import org.elasticsearch.ElasticsearchParseException;
 import org.elasticsearch.test.ESTestCase;
 import org.elasticsearch.xpack.prelert.job.status.CountingInputStream;
 import org.elasticsearch.xpack.prelert.job.status.StatusReporter;
 import com.fasterxml.jackson.core.JsonFactory;
 import com.fasterxml.jackson.core.JsonParseException;
 import com.fasterxml.jackson.core.JsonParser;
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.InputStream;
@ -31,7 +30,7 @@ public class SimpleJsonRecordReaderTests extends ESTestCase {
        JsonParser parser = createParser(data);
        Map<String, Integer> fieldMap = createFieldMap();
-        SimpleJsonRecordReader reader = new SimpleJsonRecordReader(parser, fieldMap, "", mock(Logger.class));
+        SimpleJsonRecordReader reader = new SimpleJsonRecordReader(parser, fieldMap, mock(Logger.class));
        String record[] = new String[3];
        boolean gotFields[] = new boolean[3];
@ -58,7 +57,7 @@ public class SimpleJsonRecordReaderTests extends ESTestCase {
        fieldMap.put("b", 1);
        fieldMap.put("c.e", 2);
-        SimpleJsonRecordReader reader = new SimpleJsonRecordReader(parser, fieldMap, "", mock(Logger.class));
+        SimpleJsonRecordReader reader = new SimpleJsonRecordReader(parser, fieldMap, mock(Logger.class));
        String record[] = new String[3];
        boolean gotFields[] = new boolean[3];
@ -80,7 +79,7 @@ public class SimpleJsonRecordReaderTests extends ESTestCase {
        fieldMap.put("b", 1);
        fieldMap.put("c.e", 2);
-        SimpleJsonRecordReader reader = new SimpleJsonRecordReader(parser, fieldMap, "", mock(Logger.class));
+        SimpleJsonRecordReader reader = new SimpleJsonRecordReader(parser, fieldMap, mock(Logger.class));
        String record[] = new String[3];
        boolean gotFields[] = new boolean[3];
@ -102,7 +101,7 @@ public class SimpleJsonRecordReaderTests extends ESTestCase {
        fieldMap.put("g", 1);
        fieldMap.put("c.e", 2);
-        SimpleJsonRecordReader reader = new SimpleJsonRecordReader(parser, fieldMap, "", mock(Logger.class));
+        SimpleJsonRecordReader reader = new SimpleJsonRecordReader(parser, fieldMap, mock(Logger.class));
        String record[] = new String[3];
        boolean gotFields[] = new boolean[3];
@ -127,7 +126,7 @@ public class SimpleJsonRecordReaderTests extends ESTestCase {
        JsonParser parser = createParser(data);
        Map<String, Integer> fieldMap = createFieldMap();
-        SimpleJsonRecordReader reader = new SimpleJsonRecordReader(parser, fieldMap, "", mock(Logger.class));
+        SimpleJsonRecordReader reader = new SimpleJsonRecordReader(parser, fieldMap, mock(Logger.class));
        String record[] = new String[3];
        boolean gotFields[] = new boolean[3];
@ -149,7 +148,7 @@ public class SimpleJsonRecordReaderTests extends ESTestCase {
        JsonParser parser = createParser(data);
        Map<String, Integer> fieldMap = createFieldMap();
-        SimpleJsonRecordReader reader = new SimpleJsonRecordReader(parser, fieldMap, "", mock(Logger.class));
+        SimpleJsonRecordReader reader = new SimpleJsonRecordReader(parser, fieldMap, mock(Logger.class));
        String record[] = new String[3];
        boolean gotFields[] = new boolean[3];
@ -177,7 +176,7 @@ public class SimpleJsonRecordReaderTests extends ESTestCase {
        JsonParser parser = createParser(builder.toString());
        Map<String, Integer> fieldMap = createFieldMap();
-        SimpleJsonRecordReader reader = new SimpleJsonRecordReader(parser, fieldMap, "", mock(Logger.class));
+        SimpleJsonRecordReader reader = new SimpleJsonRecordReader(parser, fieldMap, mock(Logger.class));
        ESTestCase.expectThrows(ElasticsearchParseException.class, () -> readUntilError(reader));
    }
@ -191,34 +190,6 @@ public class SimpleJsonRecordReaderTests extends ESTestCase {
        }
    }
    public void testRead_GivenDataEmbeddedInSource() throws Exception {
        String data = "{\"took\": 1,\"hits\":{\"total\":1,\"hits\":["
                + "{\"_index\":\"foo\",\"_source\":{\"a\":1,\"b\":2,\"c\":3}},"
                + "{\"_index\":\"foo\",\"_source\":{\"a\":4,\"b\":5,\"c\":6}}"
                + "]}}\n";
        JsonParser parser = createParser(data);
        Map<String, Integer> fieldMap = createFieldMap();
        SimpleJsonRecordReader reader = new SimpleJsonRecordReader(parser, fieldMap, "_source", mock(Logger.class));
        String record[] = new String[3];
        boolean gotFields[] = new boolean[3];
        assertEquals(3, reader.read(record, gotFields));
        assertEquals("1", record[0]);
        assertEquals("2", record[1]);
        assertEquals("3", record[2]);
        assertEquals(3, reader.read(record, gotFields));
        assertEquals("4", record[0]);
        assertEquals("5", record[1]);
        assertEquals("6", record[2]);
        assertEquals(-1, reader.read(record, gotFields));
    }
    public void testRead_givenControlCharacterInData() throws Exception {
        char controlChar = '\u0002';
@ -229,7 +200,7 @@ public class SimpleJsonRecordReaderTests extends ESTestCase {
        JsonParser parser = createParser(data);
        Map<String, Integer> fieldMap = createFieldMap();
-        SimpleJsonRecordReader reader = new SimpleJsonRecordReader(parser, fieldMap, "", mock(Logger.class));
+        SimpleJsonRecordReader reader = new SimpleJsonRecordReader(parser, fieldMap, mock(Logger.class));
        String record[] = new String[3];
        boolean gotFields[] = new boolean[3];
--- a/elasticsearch/src/test/java/org/elasticsearch/xpack/prelert/scheduler/ScheduledJobRunnerTests.java
+++ b/elasticsearch/src/test/java/org/elasticsearch/xpack/prelert/scheduler/ScheduledJobRunnerTests.java
@ -30,10 +30,10 @@ import org.elasticsearch.xpack.prelert.job.Detector;
 import org.elasticsearch.xpack.prelert.job.Job;
 import org.elasticsearch.xpack.prelert.job.JobStatus;
 import org.elasticsearch.xpack.prelert.job.audit.Auditor;
 import org.elasticsearch.xpack.prelert.job.extraction.DataExtractor;
 import org.elasticsearch.xpack.prelert.job.extraction.DataExtractorFactory;
 import org.elasticsearch.xpack.prelert.job.metadata.PrelertMetadata;
 import org.elasticsearch.xpack.prelert.job.persistence.JobProvider;
 import org.elasticsearch.xpack.prelert.scheduler.extractor.DataExtractor;
 import org.elasticsearch.xpack.prelert.scheduler.extractor.DataExtractorFactory;
 import org.junit.Before;
 import java.io.ByteArrayInputStream;
@ -104,8 +104,12 @@ public class ScheduledJobRunnerTests extends ESTestCase {
        when(client.execute(same(PostDataAction.INSTANCE), any())).thenReturn(jobDataFuture);
        when(client.execute(same(FlushJobAction.INSTANCE), any())).thenReturn(flushJobFuture);
-        scheduledJobRunner = new ScheduledJobRunner(threadPool, client, clusterService, jobProvider, dataExtractorFactory,
+        scheduledJobRunner = new ScheduledJobRunner(threadPool, client, clusterService, jobProvider, () -> currentTime) {
-                () -> currentTime);
+            @Override
            DataExtractorFactory createDataExtractorFactory(SchedulerConfig schedulerConfig, Job job) {
                return dataExtractorFactory;
            }
        };
        when(jobProvider.audit(anyString())).thenReturn(auditor);
        doAnswer(invocationOnMock -> {
@ -131,7 +135,7 @@ public class ScheduledJobRunnerTests extends ESTestCase {
                .build());
        DataExtractor dataExtractor = mock(DataExtractor.class);
-        when(dataExtractorFactory.newExtractor(schedulerConfig, job)).thenReturn(dataExtractor);
+        when(dataExtractorFactory.newExtractor(0L, 60000L)).thenReturn(dataExtractor);
        when(dataExtractor.hasNext()).thenReturn(true).thenReturn(false);
        InputStream in = new ByteArrayInputStream("".getBytes(Charset.forName("utf-8")));
        when(dataExtractor.next()).thenReturn(Optional.of(in));
@ -140,7 +144,6 @@ public class ScheduledJobRunnerTests extends ESTestCase {
        StartSchedulerAction.SchedulerTask task = mock(StartSchedulerAction.SchedulerTask.class);
        scheduledJobRunner.run("scheduler1", 0L, 60000L, task, handler);
        verify(dataExtractor).newSearch(eq(0L), eq(60000L), any());
        verify(threadPool, times(1)).executor(PrelertPlugin.SCHEDULED_RUNNER_THREAD_POOL_NAME);
        verify(threadPool, never()).schedule(any(), any(), any());
        verify(client).execute(same(PostDataAction.INSTANCE), eq(new PostDataAction.Request("foo")));
@ -164,7 +167,7 @@ public class ScheduledJobRunnerTests extends ESTestCase {
                .build());
        DataExtractor dataExtractor = mock(DataExtractor.class);
-        when(dataExtractorFactory.newExtractor(schedulerConfig, job)).thenReturn(dataExtractor);
+        when(dataExtractorFactory.newExtractor(0L, 60000L)).thenReturn(dataExtractor);
        when(dataExtractor.hasNext()).thenReturn(true).thenReturn(false);
        when(dataExtractor.next()).thenThrow(new RuntimeException("dummy"));
        when(jobDataFuture.get()).thenReturn(new PostDataAction.Response(dataCounts));
@ -172,7 +175,6 @@ public class ScheduledJobRunnerTests extends ESTestCase {
        StartSchedulerAction.SchedulerTask task = mock(StartSchedulerAction.SchedulerTask.class);
        scheduledJobRunner.run("scheduler1", 0L, 60000L, task, handler);
        verify(dataExtractor).newSearch(eq(0L), eq(60000L), any());
        verify(threadPool, times(1)).executor(PrelertPlugin.SCHEDULED_RUNNER_THREAD_POOL_NAME);
        verify(threadPool, never()).schedule(any(), any(), any());
        verify(client, never()).execute(same(PostDataAction.INSTANCE), eq(new PostDataAction.Request("foo")));
@ -196,7 +198,7 @@ public class ScheduledJobRunnerTests extends ESTestCase {
                .build());
        DataExtractor dataExtractor = mock(DataExtractor.class);
-        when(dataExtractorFactory.newExtractor(schedulerConfig, job)).thenReturn(dataExtractor);
+        when(dataExtractorFactory.newExtractor(0L, 60000L)).thenReturn(dataExtractor);
        when(dataExtractor.hasNext()).thenReturn(true).thenReturn(false);
        InputStream in = new ByteArrayInputStream("".getBytes(Charset.forName("utf-8")));
        when(dataExtractor.next()).thenReturn(Optional.of(in));
@ -206,7 +208,6 @@ public class ScheduledJobRunnerTests extends ESTestCase {
        StartSchedulerAction.SchedulerTask task = new StartSchedulerAction.SchedulerTask(1, "type", "action", null, "scheduler1");
        scheduledJobRunner.run("scheduler1", 0L, null, task, handler);
        verify(dataExtractor).newSearch(eq(0L), eq(60000L), any());
        verify(threadPool, times(1)).executor(PrelertPlugin.SCHEDULED_RUNNER_THREAD_POOL_NAME);
        if (cancelled) {
            task.stop();
--- a/elasticsearch/src/test/java/org/elasticsearch/xpack/prelert/scheduler/ScheduledJobTests.java
+++ b/elasticsearch/src/test/java/org/elasticsearch/xpack/prelert/scheduler/ScheduledJobTests.java
@ -12,7 +12,8 @@ import org.elasticsearch.xpack.prelert.action.FlushJobAction;
 import org.elasticsearch.xpack.prelert.action.PostDataAction;
 import org.elasticsearch.xpack.prelert.job.DataCounts;
 import org.elasticsearch.xpack.prelert.job.audit.Auditor;
-import org.elasticsearch.xpack.prelert.job.extraction.DataExtractor;
+import org.elasticsearch.xpack.prelert.scheduler.extractor.DataExtractor;
 import org.elasticsearch.xpack.prelert.scheduler.extractor.DataExtractorFactory;
 import org.junit.Before;
 import org.mockito.ArgumentCaptor;
@ -25,6 +26,7 @@ import java.util.Optional;
 import java.util.function.Supplier;
 import static org.mockito.Matchers.any;
 import static org.mockito.Matchers.anyLong;
 import static org.mockito.Matchers.eq;
 import static org.mockito.Matchers.same;
 import static org.mockito.Mockito.mock;
@ -35,6 +37,7 @@ import static org.mockito.Mockito.when;
 public class ScheduledJobTests extends ESTestCase {
    private Auditor auditor;
    private DataExtractorFactory dataExtractorFactory;
    private DataExtractor dataExtractor;
    private Client client;
    private ActionFuture<FlushJobAction.Response> flushJobFuture;
@ -45,7 +48,9 @@ public class ScheduledJobTests extends ESTestCase {
    @SuppressWarnings("unchecked")
    public void setup() throws Exception {
        auditor = mock(Auditor.class);
        dataExtractorFactory = mock(DataExtractorFactory.class);
        dataExtractor = mock(DataExtractor.class);
        when(dataExtractorFactory.newExtractor(anyLong(), anyLong())).thenReturn(dataExtractor);
        client = mock(Client.class);
        ActionFuture<PostDataAction.Response> jobDataFuture = mock(ActionFuture.class);
        flushJobFuture = mock(ActionFuture.class);
@ -64,7 +69,7 @@ public class ScheduledJobTests extends ESTestCase {
        ScheduledJob scheduledJob = createScheduledJob(1000, 500, -1, -1);
        assertNull(scheduledJob.runLookBack(0L, 1000L));
-        verify(dataExtractor).newSearch(eq(0L), eq(1000L), any());
+        verify(dataExtractorFactory).newExtractor(0L, 1000L);
        FlushJobAction.Request flushRequest = new FlushJobAction.Request("_job_id");
        flushRequest.setCalcInterim(true);
        verify(client).execute(same(FlushJobAction.INSTANCE), eq(flushRequest));
@ -78,7 +83,7 @@ public class ScheduledJobTests extends ESTestCase {
        long next = scheduledJob.runLookBack(0L, null);
        assertEquals(2000 + frequencyMs + 100, next);
-        verify(dataExtractor).newSearch(eq(0L), eq(1500L), any());
+        verify(dataExtractorFactory).newExtractor(0L, 1500L);
        FlushJobAction.Request flushRequest = new FlushJobAction.Request("_job_id");
        flushRequest.setCalcInterim(true);
        verify(client).execute(same(FlushJobAction.INSTANCE), eq(flushRequest));
@ -100,7 +105,7 @@ public class ScheduledJobTests extends ESTestCase {
        long next = scheduledJob.runLookBack(0L, null);
        assertEquals(10000 + frequencyMs + 100, next);
-        verify(dataExtractor).newSearch(eq(5000 + 1L), eq(currentTime - queryDelayMs), any());
+        verify(dataExtractorFactory).newExtractor(5000 + 1L, currentTime - queryDelayMs);
        FlushJobAction.Request flushRequest = new FlushJobAction.Request("_job_id");
        flushRequest.setCalcInterim(true);
        verify(client).execute(same(FlushJobAction.INSTANCE), eq(flushRequest));
@ -114,7 +119,7 @@ public class ScheduledJobTests extends ESTestCase {
        long next = scheduledJob.runRealtime();
        assertEquals(currentTime + frequencyMs + 100, next);
-        verify(dataExtractor).newSearch(eq(1000L + 1L), eq(currentTime - queryDelayMs), any());
+        verify(dataExtractorFactory).newExtractor(1000L + 1L, currentTime - queryDelayMs);
        FlushJobAction.Request flushRequest = new FlushJobAction.Request("_job_id");
        flushRequest.setCalcInterim(true);
        flushRequest.setAdvanceTime("1000");
@ -122,7 +127,6 @@ public class ScheduledJobTests extends ESTestCase {
    }
    public void testEmptyDataCount() throws Exception {
        dataExtractor = mock(DataExtractor.class);
        when(dataExtractor.hasNext()).thenReturn(false);
        ScheduledJob scheduledJob = createScheduledJob(1000, 500, -1, -1);
@ -130,7 +134,6 @@ public class ScheduledJobTests extends ESTestCase {
    }
    public void testExtractionProblem() throws Exception {
        dataExtractor = mock(DataExtractor.class);
        when(dataExtractor.hasNext()).thenReturn(true);
        when(dataExtractor.next()).thenThrow(new IOException());
@ -142,7 +145,7 @@ public class ScheduledJobTests extends ESTestCase {
        ArgumentCaptor<Long> startTimeCaptor = ArgumentCaptor.forClass(Long.class);
        ArgumentCaptor<Long> endTimeCaptor = ArgumentCaptor.forClass(Long.class);
-        verify(dataExtractor, times(2)).newSearch(startTimeCaptor.capture(), endTimeCaptor.capture(), any());
+        verify(dataExtractorFactory, times(2)).newExtractor(startTimeCaptor.capture(), endTimeCaptor.capture());
        assertEquals(0L, startTimeCaptor.getAllValues().get(0).longValue());
        assertEquals(1000L, startTimeCaptor.getAllValues().get(1).longValue());
        assertEquals(1000L, endTimeCaptor.getAllValues().get(0).longValue());
@ -162,7 +165,7 @@ public class ScheduledJobTests extends ESTestCase {
        ArgumentCaptor<Long> startTimeCaptor = ArgumentCaptor.forClass(Long.class);
        ArgumentCaptor<Long> endTimeCaptor = ArgumentCaptor.forClass(Long.class);
-        verify(dataExtractor, times(2)).newSearch(startTimeCaptor.capture(), endTimeCaptor.capture(), any());
+        verify(dataExtractorFactory, times(2)).newExtractor(startTimeCaptor.capture(), endTimeCaptor.capture());
        assertEquals(0L, startTimeCaptor.getAllValues().get(0).longValue());
        assertEquals(1000L, startTimeCaptor.getAllValues().get(1).longValue());
        assertEquals(1000L, endTimeCaptor.getAllValues().get(0).longValue());
@ -173,7 +176,7 @@ public class ScheduledJobTests extends ESTestCase {
    private ScheduledJob createScheduledJob(long frequencyMs, long queryDelayMs, long latestFinalBucketEndTimeMs,
                                            long latestRecordTimeMs) {
        Supplier<Long> currentTimeSupplier = () -> currentTime;
-        return new ScheduledJob("_job_id", frequencyMs, queryDelayMs, dataExtractor, client, auditor,
+        return new ScheduledJob("_job_id", frequencyMs, queryDelayMs, dataExtractorFactory, client, auditor,
                currentTimeSupplier, latestFinalBucketEndTimeMs, latestRecordTimeMs);
    }
--- a/elasticsearch/src/test/java/org/elasticsearch/xpack/prelert/scheduler/extractor/SearchHitFieldExtractorTests.java
+++ b/elasticsearch/src/test/java/org/elasticsearch/xpack/prelert/scheduler/extractor/SearchHitFieldExtractorTests.java
@ -0,0 +1,62 @@
 /*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License;
 * you may not use this file except in compliance with the Elastic License.
 */
 package org.elasticsearch.xpack.prelert.scheduler.extractor;
 import org.elasticsearch.common.bytes.BytesArray;
 import org.elasticsearch.search.SearchHitField;
 import org.elasticsearch.search.internal.InternalSearchHit;
 import org.elasticsearch.search.internal.InternalSearchHitField;
 import org.elasticsearch.test.ESTestCase;
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Map;
 import static org.hamcrest.Matchers.equalTo;
 public class SearchHitFieldExtractorTests extends ESTestCase {
    public void testExtractTimeFieldGivenHitContainsNothing() throws IOException {
        InternalSearchHit searchHit = new InternalSearchHit(42);
        expectThrows(RuntimeException.class, () -> SearchHitFieldExtractor.extractTimeField(searchHit, "time"));
    }
    public void testExtractTimeFieldGivenSingleValueInFields() throws IOException {
        InternalSearchHit searchHit = new InternalSearchHit(42);
        Map<String, SearchHitField> fields = new HashMap<>();
        fields.put("time", new InternalSearchHitField("time", Arrays.asList(3L)));
        searchHit.fields(fields);
        assertThat(SearchHitFieldExtractor.extractTimeField(searchHit, "time"), equalTo(3L));
    }
    public void testExtractTimeFieldGivenSingleValueInSource() throws IOException {
        InternalSearchHit searchHit = new InternalSearchHit(42);
        searchHit.sourceRef(new BytesArray("{\"time\":1482418307000}"));
        assertThat(SearchHitFieldExtractor.extractTimeField(searchHit, "time"), equalTo(1482418307000L));
    }
    public void testExtractTimeFieldGivenArrayValue() throws IOException {
        InternalSearchHit searchHit = new InternalSearchHit(42);
        Map<String, SearchHitField> fields = new HashMap<>();
        fields.put("time", new InternalSearchHitField("time", Arrays.asList(3L, 5L)));
        searchHit.fields(fields);
        expectThrows(RuntimeException.class, () -> SearchHitFieldExtractor.extractTimeField(searchHit, "time"));
    }
    public void testExtractTimeFieldGivenSingleNonLongValue() throws IOException {
        InternalSearchHit searchHit = new InternalSearchHit(42);
        Map<String, SearchHitField> fields = new HashMap<>();
        fields.put("time", new InternalSearchHitField("time", Arrays.asList(3)));
        searchHit.fields(fields);
        expectThrows(RuntimeException.class, () -> SearchHitFieldExtractor.extractTimeField(searchHit, "time"));
    }
 }
--- a/elasticsearch/src/test/java/org/elasticsearch/xpack/prelert/scheduler/extractor/SearchHitToJsonProcessorTests.java
+++ b/elasticsearch/src/test/java/org/elasticsearch/xpack/prelert/scheduler/extractor/SearchHitToJsonProcessorTests.java
@ -0,0 +1,122 @@
 /*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License;
 * you may not use this file except in compliance with the Elastic License.
 */
 package org.elasticsearch.xpack.prelert.scheduler.extractor;
 import org.elasticsearch.common.bytes.BytesArray;
 import org.elasticsearch.search.SearchHit;
 import org.elasticsearch.search.SearchHitField;
 import org.elasticsearch.search.internal.InternalSearchHit;
 import org.elasticsearch.search.internal.InternalSearchHitField;
 import org.elasticsearch.test.ESTestCase;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Map;
 import static org.hamcrest.Matchers.equalTo;
 public class SearchHitToJsonProcessorTests extends ESTestCase {
    public void testProcessGivenHitContainsNothing() throws IOException {
        InternalSearchHit searchHit = new InternalSearchHit(42);
        String json = searchHitToString(new String[] {"field_1", "field_2"}, searchHit);
        assertThat(json, equalTo("{}"));
    }
    public void testProcessGivenHitContainsEmptySource() throws IOException {
        InternalSearchHit searchHit = new InternalSearchHit(42);
        searchHit.sourceRef(new BytesArray("{}"));
        String json = searchHitToString(new String[] {"field_1", "field_2"}, searchHit);
        assertThat(json, equalTo("{}"));
    }
    public void testProcessGivenHitContainsSingleValueInFields() throws IOException {
        InternalSearchHit searchHit = new InternalSearchHit(42);
        Map<String, SearchHitField> fields = new HashMap<>();
        fields.put("field_1", new InternalSearchHitField("field_1", Arrays.asList(3)));
        searchHit.fields(fields);
        String json = searchHitToString(new String[] {"field_1"}, searchHit);
        assertThat(json, equalTo("{\"field_1\":3}"));
    }
    public void testProcessGivenHitContainsArrayValueInFields() throws IOException {
        InternalSearchHit searchHit = new InternalSearchHit(42);
        Map<String, SearchHitField> fields = new HashMap<>();
        fields.put("field_1", new InternalSearchHitField("field_1", Arrays.asList(3, 9)));
        searchHit.fields(fields);
        String json = searchHitToString(new String[] {"field_1"}, searchHit);
        assertThat(json, equalTo("{\"field_1\":[3,9]}"));
    }
    public void testProcessGivenHitContainsSingleValueInSource() throws IOException {
        InternalSearchHit searchHit = new InternalSearchHit(42);
        String hitSource = "{\"field_1\":\"foo\"}";
        searchHit.sourceRef(new BytesArray(hitSource));
        String json = searchHitToString(new String[] {"field_1"}, searchHit);
        assertThat(json, equalTo("{\"field_1\":\"foo\"}"));
    }
    public void testProcessGivenHitContainsArrayValueInSource() throws IOException {
        InternalSearchHit searchHit = new InternalSearchHit(42);
        String hitSource = "{\"field_1\":[\"foo\",\"bar\"]}";
        searchHit.sourceRef(new BytesArray(hitSource));
        String json = searchHitToString(new String[] {"field_1"}, searchHit);
        assertThat(json, equalTo("{\"field_1\":[\"foo\",\"bar\"]}"));
    }
    public void testProcessGivenHitContainsFieldsAndSource() throws IOException {
        InternalSearchHit searchHit = new InternalSearchHit(42);
        String hitSource = "{\"field_1\":\"foo\"}";
        searchHit.sourceRef(new BytesArray(hitSource));
        Map<String, SearchHitField> fields = new HashMap<>();
        fields.put("field_2", new InternalSearchHitField("field_2", Arrays.asList("bar")));
        searchHit.fields(fields);
        String json = searchHitToString(new String[] {"field_1", "field_2"}, searchHit);
        assertThat(json, equalTo("{\"field_1\":\"foo\",\"field_2\":\"bar\"}"));
    }
    public void testProcessGivenMultipleHits() throws IOException {
        InternalSearchHit searchHit1 = new InternalSearchHit(42);
        Map<String, SearchHitField> fields = new HashMap<>();
        fields.put("field_1", new InternalSearchHitField("field_1", Arrays.asList(3)));
        searchHit1.fields(fields);
        InternalSearchHit searchHit2 = new InternalSearchHit(42);
        fields = new HashMap<>();
        fields.put("field_1", new InternalSearchHitField("field_1", Arrays.asList(5)));
        searchHit2.fields(fields);
        String json = searchHitToString(new String[] {"field_1"}, searchHit1, searchHit2);
        assertThat(json, equalTo("{\"field_1\":3} {\"field_1\":5}"));
    }
    private String searchHitToString(String[] fields, SearchHit... searchHits) throws IOException {
        ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
        try (SearchHitToJsonProcessor hitProcessor = new SearchHitToJsonProcessor(fields, outputStream)) {
            for (int i = 0; i < searchHits.length; i++) {
                hitProcessor.process(searchHits[i]);
            }
        }
        return outputStream.toString(StandardCharsets.UTF_8.name());
    }
 }
--- a/elasticsearch/src/test/java/org/elasticsearch/xpack/prelert/scheduler/extractor/scroll/ScrollDataExtractorTests.java
+++ b/elasticsearch/src/test/java/org/elasticsearch/xpack/prelert/scheduler/extractor/scroll/ScrollDataExtractorTests.java
@ -0,0 +1,295 @@
 /*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License;
 * you may not use this file except in compliance with the Elastic License.
 */
 package org.elasticsearch.xpack.prelert.scheduler.extractor.scroll;
 import org.elasticsearch.action.search.SearchRequestBuilder;
 import org.elasticsearch.action.search.SearchResponse;
 import org.elasticsearch.client.Client;
 import org.elasticsearch.index.query.QueryBuilder;
 import org.elasticsearch.index.query.QueryBuilders;
 import org.elasticsearch.rest.RestStatus;
 import org.elasticsearch.search.SearchHit;
 import org.elasticsearch.search.SearchHitField;
 import org.elasticsearch.search.SearchHits;
 import org.elasticsearch.search.aggregations.AggregatorFactories;
 import org.elasticsearch.search.builder.SearchSourceBuilder;
 import org.elasticsearch.search.internal.InternalSearchHit;
 import org.elasticsearch.search.internal.InternalSearchHitField;
 import org.elasticsearch.test.ESTestCase;
 import org.junit.Before;
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Optional;
 import java.util.stream.Collectors;
 import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.is;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.when;
 public class ScrollDataExtractorTests extends ESTestCase {
    private Client client;
    private List<SearchRequestBuilder> capturedSearchRequests;
    private List<String> capturedContinueScrollIds;
    private List<String> capturedClearScrollIds;
    private String jobId;
    private List<String> jobFields;
    private String timeField;
    private List<String> types;
    private List<String> indexes;
    private QueryBuilder query;
    private AggregatorFactories.Builder aggregations;
    private List<SearchSourceBuilder.ScriptField> scriptFields;
    private int scrollSize;
    private class TestDataExtractor extends ScrollDataExtractor {
        private SearchResponse nextResponse;
        public TestDataExtractor(long start, long end) {
            super(client, createContext(start, end));
        }
        @Override
        protected SearchResponse executeSearchRequest(SearchRequestBuilder searchRequestBuilder) {
            capturedSearchRequests.add(searchRequestBuilder);
            return nextResponse;
        }
        @Override
        protected SearchResponse executeSearchScrollRequest(String scrollId) {
            capturedContinueScrollIds.add(scrollId);
            return nextResponse;
        }
        @Override
        void clearScroll(String scrollId) {
            capturedClearScrollIds.add(scrollId);
        }
        void setNextResponse(SearchResponse searchResponse) {
            nextResponse = searchResponse;
        }
    }
    @Before
    public void setUpTests() {
        client = mock(Client.class);
        capturedSearchRequests = new ArrayList<>();
        capturedContinueScrollIds = new ArrayList<>();
        capturedClearScrollIds = new ArrayList<>();
        timeField = "time";
        jobId = "test-job";
        jobFields = Arrays.asList(timeField, "field_1");
        indexes = Arrays.asList("index-1", "index-2");
        types = Arrays.asList("type-1", "type-2");
        query = QueryBuilders.matchAllQuery();
        aggregations = null;
        scriptFields = Collections.emptyList();
        scrollSize = 1000;
    }
    public void testSinglePageExtraction() throws IOException {
        TestDataExtractor extractor = new TestDataExtractor(1000L, 2000L);
        SearchResponse response = createSearchResponse(
                Arrays.asList(1100L, 1200L),
                Arrays.asList("a1", "a2"),
                Arrays.asList("b1", "b2")
        );
        extractor.setNextResponse(response);
        assertThat(extractor.hasNext(), is(true));
        Optional<InputStream> stream = extractor.next();
        assertThat(stream.isPresent(), is(true));
        String expectedStream = "{\"time\":1100,\"field_1\":\"a1\"} {\"time\":1200,\"field_1\":\"a2\"}";
        assertThat(asString(stream.get()), equalTo(expectedStream));
        extractor.setNextResponse(createEmptySearchResponse());
        assertThat(extractor.hasNext(), is(true));
        assertThat(extractor.next().isPresent(), is(false));
        assertThat(extractor.hasNext(), is(false));
        assertThat(capturedSearchRequests.size(), equalTo(1));
        String searchRequest = capturedSearchRequests.get(0).toString().replaceAll("\\s", "");
        assertThat(searchRequest, containsString("\"size\":1000"));
        assertThat(searchRequest, containsString("\"query\":{\"bool\":{\"filter\":[{\"match_all\":{\"boost\":1.0}}," +
                "{\"range\":{\"time\":{\"from\":1000,\"to\":2000,\"include_lower\":true,\"include_upper\":false," +
                "\"format\":\"epoch_millis\",\"boost\":1.0}}}]"));
        assertThat(searchRequest, containsString("\"sort\":[{\"time\":{\"order\":\"asc\"}}]"));
        assertThat(capturedContinueScrollIds.size(), equalTo(1));
        assertThat(capturedContinueScrollIds.get(0), equalTo(response.getScrollId()));
        assertThat(capturedClearScrollIds.isEmpty(), is(true));
    }
    public void testMultiplePageExtraction() throws IOException {
        TestDataExtractor extractor = new TestDataExtractor(1000L, 10000L);
        SearchResponse response1 = createSearchResponse(
                Arrays.asList(1000L, 2000L),
                Arrays.asList("a1", "a2"),
                Arrays.asList("b1", "b2")
        );
        extractor.setNextResponse(response1);
        assertThat(extractor.hasNext(), is(true));
        Optional<InputStream> stream = extractor.next();
        assertThat(stream.isPresent(), is(true));
        String expectedStream = "{\"time\":1000,\"field_1\":\"a1\"} {\"time\":2000,\"field_1\":\"a2\"}";
        assertThat(asString(stream.get()), equalTo(expectedStream));
        SearchResponse response2 = createSearchResponse(
                Arrays.asList(3000L, 4000L),
                Arrays.asList("a3", "a4"),
                Arrays.asList("b3", "b4")
        );
        extractor.setNextResponse(response2);
        assertThat(extractor.hasNext(), is(true));
        stream = extractor.next();
        assertThat(stream.isPresent(), is(true));
        expectedStream = "{\"time\":3000,\"field_1\":\"a3\"} {\"time\":4000,\"field_1\":\"a4\"}";
        assertThat(asString(stream.get()), equalTo(expectedStream));
        extractor.setNextResponse(createEmptySearchResponse());
        assertThat(extractor.hasNext(), is(true));
        assertThat(extractor.next().isPresent(), is(false));
        assertThat(extractor.hasNext(), is(false));
        assertThat(capturedSearchRequests.size(), equalTo(1));
        String searchRequest1 = capturedSearchRequests.get(0).toString().replaceAll("\\s", "");
        assertThat(searchRequest1, containsString("\"size\":1000"));
        assertThat(searchRequest1, containsString("\"query\":{\"bool\":{\"filter\":[{\"match_all\":{\"boost\":1.0}}," +
                "{\"range\":{\"time\":{\"from\":1000,\"to\":10000,\"include_lower\":true,\"include_upper\":false," +
                "\"format\":\"epoch_millis\",\"boost\":1.0}}}]"));
        assertThat(searchRequest1, containsString("\"sort\":[{\"time\":{\"order\":\"asc\"}}]"));
        assertThat(capturedContinueScrollIds.size(), equalTo(2));
        assertThat(capturedContinueScrollIds.get(0), equalTo(response1.getScrollId()));
        assertThat(capturedContinueScrollIds.get(1), equalTo(response2.getScrollId()));
        assertThat(capturedClearScrollIds.isEmpty(), is(true));
    }
    public void testMultiplePageExtractionGivenCancel() throws IOException {
        TestDataExtractor extractor = new TestDataExtractor(1000L, 10000L);
        SearchResponse response1 = createSearchResponse(
                Arrays.asList(1000L, 2000L),
                Arrays.asList("a1", "a2"),
                Arrays.asList("b1", "b2")
        );
        extractor.setNextResponse(response1);
        assertThat(extractor.hasNext(), is(true));
        Optional<InputStream> stream = extractor.next();
        assertThat(stream.isPresent(), is(true));
        String expectedStream = "{\"time\":1000,\"field_1\":\"a1\"} {\"time\":2000,\"field_1\":\"a2\"}";
        assertThat(asString(stream.get()), equalTo(expectedStream));
        extractor.cancel();
        SearchResponse response2 = createSearchResponse(
                Arrays.asList(2000L, 3000L),
                Arrays.asList("a3", "a4"),
                Arrays.asList("b3", "b4")
        );
        extractor.setNextResponse(response2);
        assertThat(extractor.hasNext(), is(true));
        stream = extractor.next();
        assertThat(stream.isPresent(), is(true));
        expectedStream = "{\"time\":2000,\"field_1\":\"a3\"}";
        assertThat(asString(stream.get()), equalTo(expectedStream));
        assertThat(extractor.hasNext(), is(false));
        assertThat(capturedClearScrollIds.size(), equalTo(1));
        assertThat(capturedClearScrollIds.get(0), equalTo(response2.getScrollId()));
    }
    public void testExtractionGivenInitSearchResponseHasError() throws IOException {
        TestDataExtractor extractor = new TestDataExtractor(1000L, 2000L);
        extractor.setNextResponse(createErrorResponse());
        assertThat(extractor.hasNext(), is(true));
        expectThrows(IOException.class, () -> extractor.next());
    }
    public void testExtractionGivenContinueScrollResponseHasError() throws IOException {
        TestDataExtractor extractor = new TestDataExtractor(1000L, 10000L);
        SearchResponse response1 = createSearchResponse(
                Arrays.asList(1000L, 2000L),
                Arrays.asList("a1", "a2"),
                Arrays.asList("b1", "b2")
        );
        extractor.setNextResponse(response1);
        assertThat(extractor.hasNext(), is(true));
        Optional<InputStream> stream = extractor.next();
        assertThat(stream.isPresent(), is(true));
        extractor.setNextResponse(createErrorResponse());
        assertThat(extractor.hasNext(), is(true));
        expectThrows(IOException.class, () -> extractor.next());
    }
    private ScrollDataExtractorContext createContext(long start, long end) {
        return new ScrollDataExtractorContext(jobId, jobFields, timeField, indexes, types, query, aggregations, scriptFields, scrollSize,
                start, end);
    }
    private SearchResponse createEmptySearchResponse() {
        return createSearchResponse(Collections.emptyList(), Collections.emptyList(), Collections.emptyList());
    }
    private SearchResponse createSearchResponse(List<Long> timestamps, List<String> field1Values, List<String> field2Values) {
        SearchResponse searchResponse = mock(SearchResponse.class);
        when(searchResponse.status()).thenReturn(RestStatus.OK);
        when(searchResponse.getScrollId()).thenReturn(randomAsciiOfLength(1000));
        SearchHits searchHits = mock(SearchHits.class);
        List<SearchHit> hits = new ArrayList<>();
        for (int i = 0; i < timestamps.size(); i++) {
            InternalSearchHit hit = new InternalSearchHit(randomInt());
            Map<String, SearchHitField> fields = new HashMap<>();
            fields.put(timeField, new InternalSearchHitField("time", Arrays.asList(timestamps.get(i))));
            fields.put("field_1", new InternalSearchHitField("field_1", Arrays.asList(field1Values.get(i))));
            fields.put("field_2", new InternalSearchHitField("field_2", Arrays.asList(field2Values.get(i))));
            hit.fields(fields);
            hits.add(hit);
        }
        when(searchHits.getHits()).thenReturn(hits.toArray(new SearchHit[hits.size()]));
        when(searchHits.hits()).thenReturn(hits.toArray(new SearchHit[hits.size()]));
        when(searchResponse.getHits()).thenReturn(searchHits);
        return searchResponse;
    }
    private SearchResponse createErrorResponse() {
        SearchResponse searchResponse = mock(SearchResponse.class);
        when(searchResponse.status()).thenReturn(RestStatus.INTERNAL_SERVER_ERROR);
        return searchResponse;
    }
    private static String asString(InputStream inputStream) throws IOException {
        try (BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8))) {
            return reader.lines().collect(Collectors.joining("\n"));
        }
    }
 }
--- a/elasticsearch/src/test/java/org/elasticsearch/xpack/prelert/scheduler/http/ElasticsearchDataExtractorTests.java
+++ b/elasticsearch/src/test/java/org/elasticsearch/xpack/prelert/scheduler/http/ElasticsearchDataExtractorTests.java
@ -1,693 +0,0 @@
 /*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License;
 * you may not use this file except in compliance with the Elastic License.
 */
 package org.elasticsearch.xpack.prelert.scheduler.http;
 import org.apache.logging.log4j.Logger;
 import org.elasticsearch.test.ESTestCase;
 import org.junit.Before;
 import java.io.BufferedReader;
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 import java.util.Locale;
 import java.util.NoSuchElementException;
 import java.util.stream.Collectors;
 import static org.mockito.Mockito.mock;
 public class ElasticsearchDataExtractorTests extends ESTestCase {
    private static final List<String> INDEXES = Arrays.asList("index-*");
    private static final List<String> TYPES = Arrays.asList("dataType");
    private static final String SEARCH = "{\"match_all\":{}}";
    private static final String TIME_FIELD = "time";
    private static final String CLEAR_SCROLL_RESPONSE = "{}";
    private Logger jobLogger;
    private String aggregations;
    private String scriptFields;
    private ElasticsearchDataExtractor extractor;
    @Before
    public void setUpTests() throws IOException {
        jobLogger = mock(Logger.class);
    }
    public void testDataExtraction() throws IOException {
        String initialResponse = "{" + "\"_scroll_id\":\"c2Nhbjs2OzM0NDg1ODpzRlBLc0FXNlNyNm5JWUc1\"," + "\"took\":17,"
                + "\"timed_out\":false," + "\"_shards\":{" + "  \"total\":1," + "  \"successful\":1," + "  \"failed\":0" + "},"
                + "\"hits\":{" + "  \"total\":1437," + "  \"max_score\":null," + "  \"hits\":[" + "    \"_index\":\"dataIndex\","
                + "    \"_type\":\"dataType\"," + "    \"_id\":\"1403481600\"," + "    \"_score\":null," + "    \"_source\":{"
                + "      \"id\":\"1403481600\"" + "    }" + "  ]" + "}" + "}";
        String scrollResponse = "{" + "\"_scroll_id\":\"secondScrollId\"," + "\"took\":8," + "\"timed_out\":false," + "\"_shards\":{"
                + "  \"total\":1," + "  \"successful\":1," + "  \"failed\":0" + "}," + "\"hits\":{" + "  \"total\":1437,"
                + "  \"max_score\":null," + "  \"hits\":[" + "    \"_index\":\"dataIndex\"," + "    \"_type\":\"dataType\","
                + "    \"_id\":\"1403782200\"," + "    \"_score\":null," + "    \"_source\":{" + "      \"id\":\"1403782200\"" + "    }"
                + "  ]" + "}" + "}";
        String scrollEndResponse = "{" + "\"_scroll_id\":\"thirdScrollId\"," + "\"took\":8," + "\"timed_out\":false," + "\"_shards\":{"
                + "  \"total\":1," + "  \"successful\":1," + "  \"failed\":0" + "}," + "\"hits\":{" + "  \"total\":1437,"
                + "  \"max_score\":null," + "  \"hits\":[]" + "}" + "}";
        List<HttpResponse> responses = Arrays.asList(new HttpResponse(toStream(initialResponse), 200),
                new HttpResponse(toStream(scrollResponse), 200), new HttpResponse(toStream(scrollEndResponse), 200));
        MockHttpRequester requester = new MockHttpRequester(responses);
        createExtractor(requester);
        extractor.newSearch(1400000000L, 1403600000L, jobLogger);
        assertTrue(extractor.hasNext());
        assertEquals(initialResponse, streamToString(extractor.next().get()));
        assertTrue(extractor.hasNext());
        assertEquals(scrollResponse, streamToString(extractor.next().get()));
        assertTrue(extractor.hasNext());
        assertFalse(extractor.next().isPresent());
        assertFalse(extractor.hasNext());
        requester.assertEqualRequestsToResponses();
        requester.assertResponsesHaveBeenConsumed();
        RequestParams firstRequestParams = requester.getGetRequestParams(0);
        assertEquals("http://localhost:9200/index-*/dataType/_search?scroll=60m&size=1000", firstRequestParams.url);
        String expectedSearchBody = "{" + "  \"sort\": [" + "    {\"time\": {\"order\": \"asc\"}}" + "  ]," + "  \"query\": {"
                + "    \"bool\": {" + "      \"filter\": [" + "        {\"match_all\":{}}," + "        {" + "          \"range\": {"
                + "            \"time\": {" + "              \"gte\": \"1970-01-17T04:53:20.000Z\","
                + "              \"lt\": \"1970-01-17T05:53:20.000Z\"," + "              \"format\": \"date_time\"" + "            }"
                + "          }" + "        }" + "      ]" + "    }" + "  }" + "}";
        assertEquals(expectedSearchBody.replaceAll(" ", ""), firstRequestParams.requestBody.replaceAll(" ", ""));
        RequestParams secondRequestParams = requester.getGetRequestParams(1);
        assertEquals("http://localhost:9200/_search/scroll?scroll=60m", secondRequestParams.url);
        assertEquals("c2Nhbjs2OzM0NDg1ODpzRlBLc0FXNlNyNm5JWUc1", secondRequestParams.requestBody);
        RequestParams thirdRequestParams = requester.getGetRequestParams(2);
        assertEquals("http://localhost:9200/_search/scroll?scroll=60m", thirdRequestParams.url);
        assertEquals("secondScrollId", thirdRequestParams.requestBody);
        assertEquals("http://localhost:9200/_search/scroll", requester.getDeleteRequestParams(0).url);
        assertEquals("{\"scroll_id\":[\"thirdScrollId\"]}", requester.getDeleteRequestParams(0).requestBody);
        assertEquals(1, requester.deleteRequestParams.size());
    }
    public void testDataExtraction_GivenInitialResponseContainsLongScrollId() throws IOException {
        StringBuilder scrollId = new StringBuilder();
        for (int i = 0; i < 300 * 1024; i++) {
            scrollId.append("a");
        }
        String initialResponse = "{" + "\"_scroll_id\":\"" + scrollId + "\"," + "\"took\":17," + "\"timed_out\":false," + "\"_shards\":{"
                + "  \"total\":1," + "  \"successful\":1," + "  \"failed\":0" + "}," + "\"hits\":{" + "  \"total\":1437,"
                + "  \"max_score\":null," + "  \"hits\":[" + "    \"_index\":\"dataIndex\"," + "    \"_type\":\"dataType\","
                + "    \"_id\":\"1403481600\"," + "    \"_score\":null," + "    \"_source\":{" + "      \"id\":\"1403481600\"" + "    }"
                + "  ]" + "}" + "}";
        String scrollEndResponse = "{" + "\"_scroll_id\":\"" + scrollId + "\"," + "\"took\":8," + "\"timed_out\":false," + "\"_shards\":{"
                + "  \"total\":1," + "  \"successful\":1," + "  \"failed\":0" + "}," + "\"hits\":{" + "  \"total\":1437,"
                + "  \"max_score\":null," + "  \"hits\":[]" + "}" + "}";
        List<HttpResponse> responses = Arrays.asList(new HttpResponse(toStream(initialResponse), 200),
                new HttpResponse(toStream(scrollEndResponse), 200));
        MockHttpRequester requester = new MockHttpRequester(responses);
        createExtractor(requester);
        extractor.newSearch(1400000000L, 1403600000L, jobLogger);
        assertTrue(extractor.hasNext());
        extractor.next();
        assertTrue(extractor.hasNext());
        extractor.next();
        assertFalse(extractor.hasNext());
        assertEquals(scrollId.toString(), requester.getGetRequestParams(1).requestBody);
    }
    public void testDataExtraction_GivenInitialResponseContainsNoHitsAndNoScrollId() throws IOException {
        String initialResponse = "{}";
        HttpResponse httpGetResponse = new HttpResponse(toStream(initialResponse), 200);
        List<HttpResponse> responses = Arrays.asList(httpGetResponse);
        MockHttpRequester requester = new MockHttpRequester(responses);
        createExtractor(requester);
        extractor.newSearch(1400000000L, 1403600000L, jobLogger);
        assertTrue(extractor.hasNext());
        IOException e = expectThrows(IOException.class, () -> extractor.next());
        assertEquals("Field '_scroll_id' was expected but not found in first 2 bytes of response:\n{}", e.getMessage());
    }
    public void testDataExtraction_GivenInitialResponseContainsHitsButNoScrollId() throws IOException {
        String initialResponse = "{" + "\"took\":17," + "\"timed_out\":false," + "\"_shards\":{" + "  \"total\":1," + "  \"successful\":1,"
                + "  \"failed\":0" + "}," + "\"hits\":{" + "  \"total\":1437," + "  \"max_score\":null," + "  \"hits\":["
                + "    \"_index\":\"dataIndex\"," + "    \"_type\":\"dataType\"," + "    \"_id\":\"1403481600\"," + "    \"_score\":null,"
                + "    \"_source\":{" + "      \"id\":\"1403481600\"" + "    }" + "  ]" + "}" + "}";
        HttpResponse httpGetResponse = new HttpResponse(toStream(initialResponse), 200);
        List<HttpResponse> responses = Arrays.asList(httpGetResponse);
        MockHttpRequester requester = new MockHttpRequester(responses);
        createExtractor(requester);
        extractor.newSearch(1400000000L, 1403600000L, jobLogger);
        assertTrue(extractor.hasNext());
        IOException e = expectThrows(IOException.class, () -> extractor.next());
        assertEquals("Field '_scroll_id' was expected but not found in first 272 bytes of response:\n" + initialResponse, e.getMessage());
    }
    public void testDataExtraction_GivenInitialResponseContainsTooLongScrollId() throws IOException {
        StringBuilder scrollId = new StringBuilder();
        for (int i = 0; i < 1024 * 1024; i++) {
            scrollId.append("a");
        }
        String initialResponse = "{" + "\"_scroll_id\":\"" + scrollId + "\"," + "\"took\":17," + "\"timed_out\":false," + "\"_shards\":{"
                + "  \"total\":1," + "  \"successful\":1," + "  \"failed\":0" + "}," + "\"hits\":{" + "  \"total\":1437,"
                + "  \"max_score\":null," + "  \"hits\":[" + "    \"_index\":\"dataIndex\"," + "    \"_type\":\"dataType\","
                + "    \"_id\":\"1403481600\"," + "    \"_score\":null," + "    \"_source\":{" + "      \"id\":\"1403481600\"" + "    }"
                + "  ]" + "}" + "}";
        HttpResponse httpGetResponse = new HttpResponse(toStream(initialResponse), 200);
        List<HttpResponse> responses = Arrays.asList(httpGetResponse);
        MockHttpRequester requester = new MockHttpRequester(responses);
        createExtractor(requester);
        extractor.newSearch(1400000000L, 1403600000L, jobLogger);
        assertTrue(extractor.hasNext());
        IOException e = expectThrows(IOException.class, () -> extractor.next());
        assertEquals("Field '_scroll_id' was expected but not found in first 1048576 bytes of response:\n" + initialResponse,
                e.getMessage());
    }
    public void testDataExtraction_GivenInitialResponseDoesNotReturnOk() throws IOException {
        String initialResponse = "{}";
        List<HttpResponse> responses = Arrays.asList(new HttpResponse(toStream(initialResponse), 500));
        MockHttpRequester requester = new MockHttpRequester(responses);
        createExtractor(requester);
        extractor.newSearch(1400000000L, 1403600000L, jobLogger);
        assertTrue(extractor.hasNext());
        IOException e = expectThrows(IOException.class, () -> extractor.next());
        assertEquals(
                "Request 'http://localhost:9200/index-*/dataType/_search?scroll=60m&size=1000' failed with status code: 500."
                        + " Response was:\n{}",
                        e.getMessage());
    }
    public void testDataExtraction_GivenScrollResponseDoesNotReturnOk() throws IOException {
        String initialResponse = "{" + "\"_scroll_id\":\"c2Nhbjs2OzM0NDg1ODpzRlBLc0FXNlNyNm5JWUc1\"," + "\"hits\":[..." + "}";
        List<HttpResponse> responses = Arrays.asList(new HttpResponse(toStream(initialResponse), 200),
                new HttpResponse(toStream("{}"), 500));
        MockHttpRequester requester = new MockHttpRequester(responses);
        createExtractor(requester);
        extractor.newSearch(1400000000L, 1403600000L, jobLogger);
        assertTrue(extractor.hasNext());
        assertEquals(initialResponse, streamToString(extractor.next().get()));
        assertTrue(extractor.hasNext());
        IOException e = expectThrows(IOException.class, () -> extractor.next());
        assertEquals("Request 'http://localhost:9200/_search/scroll?scroll=60m' with scroll id 'c2Nhbjs2OzM0NDg1ODpzRlBLc0FXNlNyNm5JWUc1' "
                + "failed with status code: 500. Response was:\n{}", e.getMessage());
    }
    public void testNext_ThrowsGivenHasNotNext() throws IOException {
        String initialResponse = "{" + "\"_scroll_id\":\"c2Nhbjs2OzM0NDg1ODpzRlBLc0FXNlNyNm5JWUc1\"," + "\"hits\":[]" + "}";
        List<HttpResponse> responses = Arrays.asList(new HttpResponse(toStream(initialResponse), 200));
        MockHttpRequester requester = new MockHttpRequester(responses);
        createExtractor(requester);
        extractor.newSearch(1400000000L, 1403600000L, jobLogger);
        assertTrue(extractor.hasNext());
        assertFalse(extractor.next().isPresent());
        assertFalse(extractor.hasNext());
        expectThrows(NoSuchElementException.class, () -> extractor.next());
    }
    public void testDataExtractionWithAggregations() throws IOException {
        aggregations = "{\"my-aggs\": {\"terms\":{\"field\":\"foo\"}}}";
        String initialResponse = "{" + "\"_scroll_id\":\"r2d2bjs2OzM0NDg1ODpzRlBLc0FXNlNyNm5JWUc1\"," + "\"took\":17,"
                + "\"timed_out\":false," + "\"_shards\":{" + "  \"total\":1," + "  \"successful\":1," + "  \"failed\":0" + "},"
                + "\"aggregations\":{" + "  \"my-aggs\":{" + "    \"buckets\":[" + "      {" + "        \"key\":\"foo\"" + "      }"
                + "    ]" + "  }" + "}" + "}";
        List<HttpResponse> responses = Arrays.asList(new HttpResponse(toStream(initialResponse), 200));
        MockHttpRequester requester = new MockHttpRequester(responses);
        createExtractor(requester);
        extractor.newSearch(1400000000L, 1403600000L, jobLogger);
        assertTrue(extractor.hasNext());
        assertEquals(initialResponse, streamToString(extractor.next().get()));
        assertFalse(extractor.next().isPresent());
        assertFalse(extractor.hasNext());
        requester.assertEqualRequestsToResponses();
        requester.assertResponsesHaveBeenConsumed();
        assertEquals(1, requester.getRequestParams.size());
        RequestParams requestParams = requester.getGetRequestParams(0);
        assertEquals("http://localhost:9200/index-*/dataType/_search?scroll=60m&size=0", requestParams.url);
        String expectedSearchBody = "{" + "  \"sort\": [" + "    {\"time\": {\"order\": \"asc\"}}" + "  ]," + "  \"query\": {"
                + "    \"bool\": {" + "      \"filter\": [" + "        {\"match_all\":{}}," + "        {" + "          \"range\": {"
                + "            \"time\": {" + "              \"gte\": \"1970-01-17T04:53:20.000Z\","
                + "              \"lt\": \"1970-01-17T05:53:20.000Z\"," + "              \"format\": \"date_time\"" + "            }"
                + "          }" + "        }" + "      ]" + "    }" + "  }," + "  \"aggs\":{\"my-aggs\": {\"terms\":{\"field\":\"foo\"}}}"
                + "}";
        assertEquals(expectedSearchBody.replaceAll(" ", ""), requestParams.requestBody.replaceAll(" ", ""));
        assertEquals("http://localhost:9200/_search/scroll", requester.getDeleteRequestParams(0).url);
        assertEquals("{\"scroll_id\":[\"r2d2bjs2OzM0NDg1ODpzRlBLc0FXNlNyNm5JWUc1\"]}", requester.getDeleteRequestParams(0).requestBody);
        assertEquals(1, requester.deleteRequestParams.size());
    }
    public void testDataExtractionWithAggregations_GivenResponseHasEmptyBuckets() throws IOException {
        aggregations = "{\"aggs\":{\"my-aggs\": {\"terms\":{\"field\":\"foo\"}}}}";
        String initialResponse = "{" + "\"_scroll_id\":\"r2d2bjs2OzM0NDg1ODpzRlBLc0FXNlNyNm5JWUc1\"," + "\"took\":17,"
                + "\"timed_out\":false," + "\"_shards\":{" + "  \"total\":1," + "  \"successful\":1," + "  \"failed\":0" + "},"
                + "\"aggregations\":{" + "  \"my-aggs\":{" + "    \"buckets\":[]" + "  }" + "}" + "}";
        List<HttpResponse> responses = Arrays.asList(new HttpResponse(toStream(initialResponse), 200));
        MockHttpRequester requester = new MockHttpRequester(responses);
        createExtractor(requester);
        extractor.newSearch(1400000000L, 1403600000L, jobLogger);
        assertTrue(extractor.hasNext());
        assertFalse(extractor.next().isPresent());
        assertFalse(extractor.hasNext());
        requester.assertEqualRequestsToResponses();
        requester.assertResponsesHaveBeenConsumed();
        assertEquals(1, requester.getRequestParams.size());
        RequestParams requestParams = requester.getGetRequestParams(0);
        assertEquals("http://localhost:9200/index-*/dataType/_search?scroll=60m&size=0", requestParams.url);
    }
    public void testChunkedDataExtraction() throws IOException {
        String dataSummaryResponse = "{" + "\"took\":17," + "\"timed_out\":false," + "\"_shards\":{" + "  \"total\":1,"
                + "  \"successful\":1," + "  \"failed\":0" + "}," + "\"hits\":{" + "  \"total\":10000," + "  \"max_score\":null,"
                + "  \"hits\":[" + "    \"_index\":\"dataIndex\"," + "    \"_type\":\"dataType\"," + "    \"_id\":\"1403481600\","
                + "    \"_score\":null," + "    \"_source\":{" + "      \"id\":\"1403481600\"" + "    }" + "  ]" + "},"
                + "\"aggregations\":{" + "\"earliestTime\":{" + "\"value\":1400000001000," + "\"value_as_string\":\"2014-05-13T16:53:21Z\""
                + "}," + "\"latestTime\":{" + "\"value\":1400007201000," + "\"value_as_string\":\"2014-05-13T17:16:01Z\"" + "}" + "}" + "}";
        String indexResponse = "{" + "\"dataIndex\":{" + "  \"settings\":{" + "    \"index\":{" + "      \"creation_date\":0,"
                + "      \"number_of_shards\":\"5\"," + "      \"number_of_replicas\":\"1\"" + "    }" + "  }" + "}";
        String initialResponse1 = "{" + "\"_scroll_id\":\"scrollId_1\"," + "\"took\":17," + "\"timed_out\":false," + "\"_shards\":{"
                + "  \"total\":1," + "  \"successful\":1," + "  \"failed\":0" + "}," + "\"hits\":{" + "  \"total\":10000,"
                + "  \"max_score\":null," + "  \"hits\":[" + "    \"_index\":\"dataIndex\"," + "    \"_type\":\"dataType\","
                + "    \"_id\":\"1403481600\"," + "    \"_score\":null," + "    \"_source\":{" + "      \"id\":\"1403481600\"" + "    }"
                + "  ]" + "}" + "}";
        String continueResponse1 = "{" + "\"_scroll_id\":\"scrollId_2\"," + "\"took\":8," + "\"timed_out\":false," + "\"_shards\":{"
                + "  \"total\":1," + "  \"successful\":1," + "  \"failed\":0" + "}," + "\"hits\":{" + "  \"total\":10000,"
                + "  \"max_score\":null," + "  \"hits\":[" + "    \"_index\":\"dataIndex\"," + "    \"_type\":\"dataType\","
                + "    \"_id\":\"1403782200\"," + "    \"_score\":null," + "    \"_source\":{" + "      \"id\":\"1403782200\"" + "    }"
                + "  ]" + "}" + "}";
        String endResponse1 = "{" + "\"_scroll_id\":\"scrollId_3\"," + "\"took\":8," + "\"timed_out\":false," + "\"_shards\":{"
                + "  \"total\":1," + "  \"successful\":1," + "  \"failed\":0" + "}," + "\"hits\":{" + "  \"total\":10000,"
                + "  \"max_score\":null," + "  \"hits\":[]" + "}" + "}";
        String initialResponse2 = "{" + "\"_scroll_id\":\"scrollId_4\"," + "\"hits\":{" + "  \"total\":10000," + "  \"hits\":["
                + "    \"_index\":\"dataIndex\"" + "  ]" + "}" + "}";
        String endResponse2 = "{" + "\"_scroll_id\":\"scrollId_5\"," + "\"hits\":[]" + "}";
        String initialResponse3 = "{" + "\"_scroll_id\":\"scrollId_6\"," + "\"hits\":[]" + "}";
        String dataSummaryResponse2 = "{" + "\"took\":17," + "\"timed_out\":false," + "\"_shards\":{" + "  \"total\":1,"
                + "  \"successful\":1," + "  \"failed\":0" + "}," + "\"hits\":{" + "  \"total\":1," + "  \"max_score\":null,"
                + "  \"hits\":[" + "    \"_index\":\"dataIndex\"," + "    \"_type\":\"dataType\"," + "    \"_id\":\"1403481600\","
                + "    \"_score\":null," + "    \"_source\":{" + "      \"id\":\"1403481600\"" + "    }" + "  ]" + "},"
                + "\"aggregations\":{" + "\"earliestTime\":{" + "\"value\":1400007201000," + "\"value_as_string\":\"2014-05-13T17:16:01Z\""
                + "}," + "\"latestTime\":{" + "\"value\":1400007201000," + "\"value_as_string\":\"2014-05-13T17:16:01Z\"" + "}" + "}" + "}";
        String initialResponse4 = "{" + "\"_scroll_id\":\"scrollId_7\"," + "\"hits\":{" + "  \"total\":1," + "  \"hits\":["
                + "    \"_index\":\"dataIndex\"" + "  ]" + "}" + "}";
        String endResponse4 = "{" + "\"_scroll_id\":\"scrollId_8\"," + "\"hits\":[]" + "}";
        String response5 = "{" + "\"_scroll_id\":\"scrollId_9\"," + "\"hits\":{" + "  \"total\":0," + "  \"hits\":[]" + "}" + "}";
        String finalDataSummaryResponse = "{" + "\"took\":17," + "\"timed_out\":false," + "\"_shards\":{" + "  \"total\":0,"
                + "  \"successful\":0," + "  \"failed\":0" + "}," + "\"hits\":{" + "  \"total\":0," + "  \"max_score\":null,"
                + "  \"hits\":[]" + "}," + "\"aggregations\":{" + "\"earliestTime\":{" + "\"value\": null" + "}," + "\"latestTime\":{"
                + "\"value\": null" + "}" + "}" + "}";
        List<HttpResponse> responses = Arrays.asList(new HttpResponse(toStream(dataSummaryResponse), 200),
                new HttpResponse(toStream(indexResponse), 200), new HttpResponse(toStream(initialResponse1), 200),
                new HttpResponse(toStream(continueResponse1), 200), new HttpResponse(toStream(endResponse1), 200),
                new HttpResponse(toStream(initialResponse2), 200), new HttpResponse(toStream(endResponse2), 200),
                new HttpResponse(toStream(initialResponse3), 200), new HttpResponse(toStream(dataSummaryResponse2), 200),
                new HttpResponse(toStream(indexResponse), 200), new HttpResponse(toStream(initialResponse4), 200),
                new HttpResponse(toStream(endResponse4), 200), new HttpResponse(toStream(response5), 200),
                new HttpResponse(toStream(finalDataSummaryResponse), 200));
        MockHttpRequester requester = new MockHttpRequester(responses);
        createExtractor(requester);
        extractor.newSearch(1400000000000L, 1407200000000L, jobLogger);
        assertTrue(extractor.hasNext());
        assertEquals(initialResponse1, streamToString(extractor.next().get()));
        assertTrue(extractor.hasNext());
        assertEquals(continueResponse1, streamToString(extractor.next().get()));
        assertTrue(extractor.hasNext());
        assertEquals(initialResponse2, streamToString(extractor.next().get()));
        assertTrue(extractor.hasNext());
        assertEquals(initialResponse4, streamToString(extractor.next().get()));
        assertTrue(extractor.hasNext());
        assertFalse(extractor.next().isPresent());
        assertFalse(extractor.hasNext());
        requester.assertEqualRequestsToResponses();
        requester.assertResponsesHaveBeenConsumed();
        int requestCount = 0;
        RequestParams requestParams = requester.getGetRequestParams(requestCount++);
        assertEquals("http://localhost:9200/index-*/dataType/_search?size=1", requestParams.url);
        String expectedDataSummaryBody = "{" + "  \"sort\": [{\"_doc\":{\"order\":\"asc\"}}]," + "  \"query\": {" + "    \"bool\": {"
                + "      \"filter\": [" + "        {\"match_all\":{}}," + "        {" + "          \"range\": {" + "            \"time\": {"
                + "              \"gte\": \"2014-05-13T16:53:20.000Z\"," + "              \"lt\": \"2014-08-05T00:53:20.000Z\","
                + "              \"format\": \"date_time\"" + "            }" + "          }" + "        }" + "      ]" + "    }" + "  },"
                + "  \"aggs\":{" + "    \"earliestTime\":{" + "      \"min\":{\"field\":\"time\"}" + "    }," + "    \"latestTime\":{"
                + "      \"max\":{\"field\":\"time\"}" + "    }" + "  }" + "}";
        assertEquals(expectedDataSummaryBody.replace(" ", ""), requestParams.requestBody.replace(" ", ""));
        requestParams = requester.getGetRequestParams(requestCount++);
        assertEquals("http://localhost:9200/dataIndex/_settings", requestParams.url);
        assertNull(requestParams.requestBody);
        requestParams = requester.getGetRequestParams(requestCount++);
        assertEquals("http://localhost:9200/index-*/dataType/_search?scroll=60m&size=1000", requestParams.url);
        String expectedSearchBody = "{" + "  \"sort\": [" + "    {\"time\": {\"order\": \"asc\"}}" + "  ]," + "  \"query\": {"
                + "    \"bool\": {" + "      \"filter\": [" + "        {\"match_all\":{}}," + "        {" + "          \"range\": {"
                + "            \"time\": {" + "              \"gte\": \"2014-05-13T16:53:21.000Z\","
                + "              \"lt\": \"2014-05-13T17:53:21.000Z\"," + "              \"format\": \"date_time\"" + "            }"
                + "          }" + "        }" + "      ]" + "    }" + "  }" + "}";
        assertEquals(expectedSearchBody.replaceAll(" ", ""), requestParams.requestBody.replaceAll(" ", ""));
        requestParams = requester.getGetRequestParams(requestCount++);
        assertEquals("http://localhost:9200/_search/scroll?scroll=60m", requestParams.url);
        assertEquals("scrollId_1", requestParams.requestBody);
        requestParams = requester.getGetRequestParams(requestCount++);
        assertEquals("http://localhost:9200/_search/scroll?scroll=60m", requestParams.url);
        assertEquals("scrollId_2", requestParams.requestBody);
        requestParams = requester.getGetRequestParams(requestCount++);
        expectedSearchBody = "{" + "  \"sort\": [" + "    {\"time\": {\"order\": \"asc\"}}" + "  ]," + "  \"query\": {" + "    \"bool\": {"
                + "      \"filter\": [" + "        {\"match_all\":{}}," + "        {" + "          \"range\": {" + "            \"time\": {"
                + "              \"gte\": \"2014-05-13T17:53:21.000Z\"," + "              \"lt\": \"2014-05-13T18:53:21.000Z\","
                + "              \"format\": \"date_time\"" + "            }" + "          }" + "        }" + "      ]" + "    }" + "  }"
                + "}";
        assertEquals("http://localhost:9200/index-*/dataType/_search?scroll=60m&size=1000", requestParams.url);
        assertEquals(expectedSearchBody.replaceAll(" ", ""), requestParams.requestBody.replaceAll(" ", ""));
        requestParams = requester.getGetRequestParams(requestCount++);
        assertEquals("http://localhost:9200/_search/scroll?scroll=60m", requestParams.url);
        assertEquals("scrollId_4", requestParams.requestBody);
        requestParams = requester.getGetRequestParams(requestCount++);
        expectedSearchBody = "{" + "  \"sort\": [" + "    {\"time\": {\"order\": \"asc\"}}" + "  ]," + "  \"query\": {" + "    \"bool\": {"
                + "      \"filter\": [" + "        {\"match_all\":{}}," + "        {" + "          \"range\": {" + "            \"time\": {"
                + "              \"gte\": \"2014-05-13T18:53:21.000Z\"," + "              \"lt\": \"2014-05-13T19:53:21.000Z\","
                + "              \"format\": \"date_time\"" + "            }" + "          }" + "        }" + "      ]" + "    }" + "  }"
                + "}";
        assertEquals("http://localhost:9200/index-*/dataType/_search?scroll=60m&size=1000", requestParams.url);
        assertEquals(expectedSearchBody.replaceAll(" ", ""), requestParams.requestBody.replaceAll(" ", ""));
        requestParams = requester.getGetRequestParams(requestCount++);
        assertEquals("http://localhost:9200/index-*/dataType/_search?size=1", requestParams.url);
        expectedDataSummaryBody = "{" + "  \"sort\": [{\"_doc\":{\"order\":\"asc\"}}]," + "  \"query\": {" + "    \"bool\": {"
                + "      \"filter\": [" + "        {\"match_all\":{}}," + "        {" + "          \"range\": {" + "            \"time\": {"
                + "              \"gte\": \"2014-05-13T19:53:21.000Z\"," + "              \"lt\": \"2014-08-05T00:53:20.000Z\","
                + "              \"format\": \"date_time\"" + "            }" + "          }" + "        }" + "      ]" + "    }" + "  },"
                + "  \"aggs\":{" + "    \"earliestTime\":{" + "      \"min\":{\"field\":\"time\"}" + "    }," + "    \"latestTime\":{"
                + "      \"max\":{\"field\":\"time\"}" + "    }" + "  }" + "}";
        assertEquals(expectedDataSummaryBody.replace(" ", ""), requestParams.requestBody.replace(" ", ""));
        requestParams = requester.getGetRequestParams(requestCount++);
        assertEquals("http://localhost:9200/dataIndex/_settings", requestParams.url);
        assertNull(requestParams.requestBody);
        requestParams = requester.getGetRequestParams(requestCount++);
        expectedSearchBody = "{" + "  \"sort\": [" + "    {\"time\": {\"order\": \"asc\"}}" + "  ]," + "  \"query\": {" + "    \"bool\": {"
                + "      \"filter\": [" + "        {\"match_all\":{}}," + "        {" + "          \"range\": {" + "            \"time\": {"
                + "              \"gte\": \"2014-05-13T18:53:21.000Z\"," + "              \"lt\": \"2014-05-13T18:53:31.000Z\","
                + "              \"format\": \"date_time\"" + "            }" + "          }" + "        }" + "      ]" + "    }" + "  }"
                + "}";
        assertEquals("http://localhost:9200/index-*/dataType/_search?scroll=60m&size=1000", requestParams.url);
        assertEquals(expectedSearchBody.replaceAll(" ", ""), requestParams.requestBody.replaceAll(" ", ""));
        requestParams = requester.getGetRequestParams(requestCount++);
        assertEquals("http://localhost:9200/_search/scroll?scroll=60m", requestParams.url);
        assertEquals("scrollId_7", requestParams.requestBody);
        requestParams = requester.getGetRequestParams(requestCount++);
        expectedSearchBody = "{" + "  \"sort\": [" + "    {\"time\": {\"order\": \"asc\"}}" + "  ]," + "  \"query\": {" + "    \"bool\": {"
                + "      \"filter\": [" + "        {\"match_all\":{}}," + "        {" + "          \"range\": {" + "            \"time\": {"
                + "              \"gte\": \"2014-05-13T18:53:31.000Z\"," + "              \"lt\": \"2014-05-13T18:53:41.000Z\","
                + "              \"format\": \"date_time\"" + "            }" + "          }" + "        }" + "      ]" + "    }" + "  }"
                + "}";
        assertEquals("http://localhost:9200/index-*/dataType/_search?scroll=60m&size=1000", requestParams.url);
        assertEquals(expectedSearchBody.replaceAll(" ", ""), requestParams.requestBody.replaceAll(" ", ""));
        requestParams = requester.getGetRequestParams(requestCount++);
        assertEquals("http://localhost:9200/index-*/dataType/_search?size=1", requestParams.url);
        expectedDataSummaryBody = "{" + "  \"sort\": [{\"_doc\":{\"order\":\"asc\"}}]," + "  \"query\": {" + "    \"bool\": {"
                + "      \"filter\": [" + "        {\"match_all\":{}}," + "        {" + "          \"range\": {" + "            \"time\": {"
                + "              \"gte\": \"2014-05-13T18:53:41.000Z\"," + "              \"lt\": \"2014-08-05T00:53:20.000Z\","
                + "              \"format\": \"date_time\"" + "            }" + "          }" + "        }" + "      ]" + "    }" + "  },"
                + "  \"aggs\":{" + "    \"earliestTime\":{" + "      \"min\":{\"field\":\"time\"}" + "    }," + "    \"latestTime\":{"
                + "      \"max\":{\"field\":\"time\"}" + "    }" + "  }" + "}";
        assertEquals(expectedDataSummaryBody.replace(" ", ""), requestParams.requestBody.replace(" ", ""));
        assertEquals(requestCount, requester.requestCount);
        String[] deletedScrollIds = { "scrollId_3", "scrollId_5", "scrollId_6", "scrollId_8", "scrollId_9" };
        assertEquals(5, requester.deleteRequestParams.size());
        for (int i = 0; i < deletedScrollIds.length; i++) {
            assertEquals("http://localhost:9200/_search/scroll", requester.getDeleteRequestParams(i).url);
            assertEquals(String.format(Locale.ROOT, "{\"scroll_id\":[\"%s\"]}", deletedScrollIds[i]),
                    requester.getDeleteRequestParams(i).requestBody);
        }
    }
    public void testChunkedDataExtraction_GivenZeroHits() throws IOException {
        String dataSummaryResponse = "{" + "\"took\":17," + "\"timed_out\":false," + "\"_shards\":{" + "  \"total\":1,"
                + "  \"successful\":1," + "  \"failed\":0" + "}," + "\"hits\":{" + "  \"total\":0," + "  \"max_score\":null,"
                + "  \"hits\":[]" + "}," + "\"aggregations\":{" + "\"earliestTime\":null," + "\"latestTime\":null" + "}" + "}";
        String searchResponse = "{" + "\"_scroll_id\":\"scrollId_1\"," + "\"took\":17," + "\"timed_out\":false," + "\"_shards\":{"
                + "  \"total\":1," + "  \"successful\":1," + "  \"failed\":0" + "}," + "\"hits\":{" + "  \"total\":0,"
                + "  \"max_score\":null," + "  \"hits\":[]" + "}" + "}";
        List<HttpResponse> responses = Arrays.asList(new HttpResponse(toStream(dataSummaryResponse), 200),
                new HttpResponse(toStream(searchResponse), 200));
        MockHttpRequester requester = new MockHttpRequester(responses);
        createExtractor(requester);
        extractor.newSearch(1400000000000L, 1407200000000L, jobLogger);
        assertTrue(extractor.hasNext());
        assertFalse(extractor.next().isPresent());
        assertFalse(extractor.hasNext());
    }
    public void testChunkedDataExtraction_GivenDataSummaryRequestIsNotOk() throws IOException {
        String dataSummaryResponse = "{}";
        List<HttpResponse> responses = Arrays.asList(new HttpResponse(toStream(dataSummaryResponse), 400));
        MockHttpRequester requester = new MockHttpRequester(responses);
        createExtractor(requester);
        IOException e = expectThrows(IOException.class, () -> extractor.newSearch(1400000000000L, 1407200000000L, jobLogger));
        assertEquals("Request 'http://localhost:9200/index-*/dataType/_search?size=1' " + "failed with status code: 400. Response was:\n"
                + dataSummaryResponse, e.getMessage());
    }
    public void testChunkedDataExtraction_GivenEmptyDataSummaryResponse() throws IOException {
        String dataSummaryResponse = "{}";
        List<HttpResponse> responses = Arrays.asList(new HttpResponse(toStream(dataSummaryResponse), 200));
        MockHttpRequester requester = new MockHttpRequester(responses);
        createExtractor(requester);
        IOException e = expectThrows(IOException.class, () -> extractor.newSearch(1400000000000L, 1407200000000L, jobLogger));
        assertEquals("Failed to parse string from pattern '\"hits\":\\{.*?\"total\":(.*?),'. Response was:\n" + dataSummaryResponse,
                e.getMessage());
    }
    public void testChunkedDataExtraction_GivenTotalHitsCannotBeParsed() throws IOException {
        String dataSummaryResponse = "{" + "\"hits\":{" + "  \"total\":\"NaN\"," + "  \"max_score\":null," + "  \"hits\":[]" + "}," + "}";
        List<HttpResponse> responses = Arrays.asList(new HttpResponse(toStream(dataSummaryResponse), 200));
        MockHttpRequester requester = new MockHttpRequester(responses);
        createExtractor(requester);
        IOException e = expectThrows(IOException.class, () -> extractor.newSearch(1400000000000L, 1407200000000L, jobLogger));
        assertEquals("Failed to parse long from pattern '\"hits\":\\{.*?\"total\":(.*?),'. Response was:\n" + dataSummaryResponse,
                e.getMessage());
    }
    public void testCancel_GivenChunked() throws IOException {
        String dataSummaryResponse = "{" + "\"took\":17," + "\"timed_out\":false," + "\"_shards\":{" + "  \"total\":1,"
                + "  \"successful\":1," + "  \"failed\":0" + "}," + "\"hits\":{" + "  \"total\":10000," + "  \"max_score\":null,"
                + "  \"hits\":[" + "    \"_index\":\"dataIndex\"," + "    \"_type\":\"dataType\"," + "    \"_id\":\"1403481600\","
                + "    \"_score\":null," + "    \"_source\":{" + "      \"id\":\"1403481600\"" + "    }" + "  ]" + "},"
                + "\"aggregations\":{" + "\"earliestTime\":{" + "\"value\":1400000001000," + "\"value_as_string\":\"2014-05-13T16:53:21Z\""
                + "}," + "\"latestTime\":{" + "\"value\":1400007201000," + "\"value_as_string\":\"2014-05-13T17:16:01Z\"" + "}" + "}" + "}";
        String indexResponse = "{" + "\"dataIndex\":{" + "  \"settings\":{" + "    \"index\":{" + "      \"creation_date\":0,"
                + "      \"number_of_shards\":\"5\"," + "      \"number_of_replicas\":\"1\"" + "    }" + "  }" + "}";
        String initialResponse1 = "{" + "\"_scroll_id\":\"scrollId_1\"," + "\"took\":17," + "\"timed_out\":false," + "\"_shards\":{"
                + "  \"total\":1," + "  \"successful\":1," + "  \"failed\":0" + "}," + "\"hits\":{" + "  \"total\":10000,"
                + "  \"max_score\":null," + "  \"hits\":[" + "    \"_index\":\"dataIndex\"," + "    \"_type\":\"dataType\","
                + "    \"_id\":\"1403481600\"," + "    \"_score\":null," + "    \"_source\":{" + "      \"id\":\"1403481600\"" + "    }"
                + "  ]" + "}" + "}";
        String continueResponse1 = "{" + "\"_scroll_id\":\"scrollId_2\"," + "\"took\":8," + "\"timed_out\":false," + "\"_shards\":{"
                + "  \"total\":1," + "  \"successful\":1," + "  \"failed\":0" + "}," + "\"hits\":{" + "  \"total\":10000,"
                + "  \"max_score\":null," + "  \"hits\":[" + "    \"_index\":\"dataIndex\"," + "    \"_type\":\"dataType\","
                + "    \"_id\":\"1403782200\"," + "    \"_score\":null," + "    \"_source\":{" + "      \"id\":\"1403782200\"" + "    }"
                + "  ]" + "}" + "}";
        String endResponse1 = "{" + "\"_scroll_id\":\"scrollId_3\"," + "\"took\":8," + "\"timed_out\":false," + "\"_shards\":{"
                + "  \"total\":1," + "  \"successful\":1," + "  \"failed\":0" + "}," + "\"hits\":{" + "  \"total\":10000,"
                + "  \"max_score\":null," + "  \"hits\":[]" + "}" + "}";
        String initialResponse2 = "{" + "\"_scroll_id\":\"scrollId_4\"," + "\"hits\":{" + "  \"total\":10000," + "  \"hits\":["
                + "    \"_index\":\"dataIndex\"" + "  ]" + "}" + "}";
        String endResponse2 = "{" + "\"_scroll_id\":\"scrollId_5\"," + "\"hits\":[]" + "}";
        List<HttpResponse> responses = Arrays.asList(new HttpResponse(toStream(dataSummaryResponse), 200),
                new HttpResponse(toStream(indexResponse), 200), new HttpResponse(toStream(initialResponse1), 200),
                new HttpResponse(toStream(continueResponse1), 200), new HttpResponse(toStream(endResponse1), 200),
                new HttpResponse(toStream(initialResponse2), 200), new HttpResponse(toStream(endResponse2), 200));
        MockHttpRequester requester = new MockHttpRequester(responses);
        createExtractor(requester);
        extractor.newSearch(1400000000000L, 1407200000000L, jobLogger);
        extractor.cancel();
        assertTrue(extractor.hasNext());
        assertEquals(initialResponse1, streamToString(extractor.next().get()));
        assertTrue(extractor.hasNext());
        assertEquals(continueResponse1, streamToString(extractor.next().get()));
        assertTrue(extractor.hasNext());
        assertFalse(extractor.next().isPresent());
        assertFalse(extractor.hasNext());
        assertEquals("http://localhost:9200/_search/scroll", requester.getDeleteRequestParams(0).url);
        assertEquals("{\"scroll_id\":[\"scrollId_3\"]}", requester.getDeleteRequestParams(0).requestBody);
        assertEquals(1, requester.deleteRequestParams.size());
        extractor.newSearch(1407200000000L, 1407203600000L, jobLogger);
        assertTrue(extractor.hasNext());
    }
    private static InputStream toStream(String input) {
        return new ByteArrayInputStream(input.getBytes(StandardCharsets.UTF_8));
    }
    private static String streamToString(InputStream stream) throws IOException {
        try (BufferedReader buffer = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))) {
            return buffer.lines().collect(Collectors.joining("\n")).trim();
        }
    }
    private void createExtractor(MockHttpRequester httpRequester) {
        ElasticsearchQueryBuilder queryBuilder = new ElasticsearchQueryBuilder(SEARCH, aggregations, scriptFields, TIME_FIELD);
        ElasticsearchUrlBuilder urlBuilder = ElasticsearchUrlBuilder.create(INDEXES, TYPES);
        extractor = new ElasticsearchDataExtractor(httpRequester, urlBuilder, queryBuilder, 1000);
    }
    private static class MockHttpRequester extends HttpRequester {
        private List<HttpResponse> getResponses;
        private List<HttpResponse> deleteResponses;
        private int requestCount = 0;
        private List<RequestParams> getRequestParams;
        private List<RequestParams> deleteRequestParams;
        public MockHttpRequester(List<HttpResponse> responses) {
            getResponses = responses;
            deleteResponses = new ArrayList<>();
            getRequestParams = new ArrayList<>(responses.size());
            deleteRequestParams = new ArrayList<>();
        }
        @Override
        public HttpResponse get(String url, String requestBody) {
            getRequestParams.add(new RequestParams(url, requestBody));
            return getResponses.get(requestCount++);
        }
        @Override
        public HttpResponse delete(String url, String requestBody) {
            deleteRequestParams.add(new RequestParams(url, requestBody));
            HttpResponse response = new HttpResponse(toStream(CLEAR_SCROLL_RESPONSE), 200);
            deleteResponses.add(response);
            return response;
        }
        public RequestParams getGetRequestParams(int callCount) {
            return getRequestParams.get(callCount);
        }
        public RequestParams getDeleteRequestParams(int callCount) {
            return deleteRequestParams.get(callCount);
        }
        public void assertEqualRequestsToResponses() {
            assertEquals(getResponses.size(), getRequestParams.size());
        }
        public void assertResponsesHaveBeenConsumed() throws IOException {
            for (HttpResponse response : getResponses) {
                assertEquals(0, response.getStream().available());
            }
            for (HttpResponse response : deleteResponses) {
                assertEquals(0, response.getStream().available());
            }
        }
    }
    private static class RequestParams {
        public final String url;
        public final String requestBody;
        public RequestParams(String url, String requestBody) {
            this.url = url;
            this.requestBody = requestBody;
        }
    }
 }
--- a/elasticsearch/src/test/java/org/elasticsearch/xpack/prelert/scheduler/http/ElasticsearchQueryBuilderTests.java
+++ b/elasticsearch/src/test/java/org/elasticsearch/xpack/prelert/scheduler/http/ElasticsearchQueryBuilderTests.java
@ -1,135 +0,0 @@
 /*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License;
 * you may not use this file except in compliance with the Elastic License.
 */
 package org.elasticsearch.xpack.prelert.scheduler.http;
 import org.apache.logging.log4j.Logger;
 import org.elasticsearch.test.ESTestCase;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.verify;
 public class ElasticsearchQueryBuilderTests extends ESTestCase {
    private static final String MATCH_ALL_QUERY = "{\"match_all\":{}}";
    public void testCreateSearchBody_GivenQueryOnly() {
        ElasticsearchQueryBuilder queryBuilder = new ElasticsearchQueryBuilder(MATCH_ALL_QUERY, null, null, "time");
        assertFalse(queryBuilder.isAggregated());
        String searchBody = queryBuilder.createSearchBody(1451606400000L, 1451610000000L);
        String expected = "{" + "  \"sort\": [" + "    {\"time\": {\"order\": \"asc\"}}" + "  ]," + "  \"query\": {" + "    \"bool\": {"
                + "      \"filter\": [" + "        {\"match_all\":{}}," + "        {" + "          \"range\": {" + "            \"time\": {"
                + "              \"gte\": \"2016-01-01T00:00:00.000Z\"," + "              \"lt\": \"2016-01-01T01:00:00.000Z\","
                + "              \"format\": \"date_time\"" + "            }" + "          }" + "        }" + "      ]" + "    }" + "  }"
                + "}";
        assertEquals(expected.replaceAll(" ", ""), searchBody.replaceAll(" ", ""));
    }
    public void testCreateSearchBody_GivenQueryAndScriptFields() {
        ElasticsearchQueryBuilder queryBuilder = new ElasticsearchQueryBuilder(MATCH_ALL_QUERY, null,
                "{\"test1\":{\"script\": \"...\"}}", "@timestamp");
        assertFalse(queryBuilder.isAggregated());
        String searchBody = queryBuilder.createSearchBody(1451606400000L, 1451610000000L);
        String expected = "{" + "  \"sort\": [" + "    {\"@timestamp\": {\"order\": \"asc\"}}" + "  ]," + "  \"query\": {"
                + "    \"bool\": {" + "      \"filter\": [" + "        {\"match_all\":{}}," + "        {" + "          \"range\": {"
                + "            \"@timestamp\": {" + "              \"gte\": \"2016-01-01T00:00:00.000Z\","
                + "              \"lt\": \"2016-01-01T01:00:00.000Z\"," + "              \"format\": \"date_time\"" + "            }"
                + "          }" + "        }" + "      ]" + "    }" + "  }," + "  \"script_fields\": {\"test1\":{\"script\":\"...\"}}"
                + "}";
        assertEquals(expected.replaceAll(" ", ""), searchBody.replaceAll(" ", ""));
    }
    public void testCreateSearchBody_GivenQueryAndAggs() {
        ElasticsearchQueryBuilder queryBuilder = new ElasticsearchQueryBuilder(MATCH_ALL_QUERY, "{\"my_aggs\":{}}", null, "time");
        assertTrue(queryBuilder.isAggregated());
        String searchBody = queryBuilder.createSearchBody(1451606400000L, 1451610000000L);
        String expected = "{" + "  \"sort\": [" + "    {\"time\": {\"order\": \"asc\"}}" + "  ]," + "  \"query\": {" + "    \"bool\": {"
                + "      \"filter\": [" + "        {\"match_all\":{}}," + "        {" + "          \"range\": {" + "            \"time\": {"
                + "              \"gte\": \"2016-01-01T00:00:00.000Z\"," + "              \"lt\": \"2016-01-01T01:00:00.000Z\","
                + "              \"format\": \"date_time\"" + "            }" + "          }" + "        }" + "      ]" + "    }" + "  },"
                + "  \"aggs\":{\"my_aggs\":{}}" + "}";
        assertEquals(expected.replaceAll(" ", ""), searchBody.replaceAll(" ", ""));
    }
    public void testCreateDataSummaryQuery_GivenQueryOnly() {
        ElasticsearchQueryBuilder queryBuilder = new ElasticsearchQueryBuilder(MATCH_ALL_QUERY, null, null, "@timestamp");
        assertFalse(queryBuilder.isAggregated());
        String dataSummaryQuery = queryBuilder.createDataSummaryQuery(1451606400000L, 1451610000000L);
        String expected = "{" + "  \"sort\": [" + "    {\"_doc\": {\"order\": \"asc\"}}" + "  ]," + "  \"query\": {" + "    \"bool\": {"
                + "      \"filter\": [" + "        {\"match_all\":{}}," + "        {" + "          \"range\": {"
                + "            \"@timestamp\": {" + "              \"gte\": \"2016-01-01T00:00:00.000Z\","
                + "              \"lt\": \"2016-01-01T01:00:00.000Z\"," + "              \"format\": \"date_time\"" + "            }"
                + "          }" + "        }" + "      ]" + "    }" + "  }," + "  \"aggs\":{" + "    \"earliestTime\":{"
                + "      \"min\":{\"field\":\"@timestamp\"}" + "    }," + "    \"latestTime\":{"
                + "      \"max\":{\"field\":\"@timestamp\"}" + "    }" + "  }" + "}";
        assertEquals(expected.replaceAll(" ", ""), dataSummaryQuery.replaceAll(" ", ""));
    }
    public void testCreateDataSummaryQuery_GivenQueryAndScriptFields() {
        ElasticsearchQueryBuilder queryBuilder = new ElasticsearchQueryBuilder(MATCH_ALL_QUERY, null,
                "{\"test1\":{\"script\": \"...\"}}", "@timestamp");
        assertFalse(queryBuilder.isAggregated());
        String dataSummaryQuery = queryBuilder.createDataSummaryQuery(1451606400000L, 1451610000000L);
        String expected = "{" + "  \"sort\": [" + "    {\"_doc\": {\"order\": \"asc\"}}" + "  ]," + "  \"query\": {" + "    \"bool\": {"
                + "      \"filter\": [" + "        {\"match_all\":{}}," + "        {" + "          \"range\": {"
                + "            \"@timestamp\": {" + "              \"gte\": \"2016-01-01T00:00:00.000Z\","
                + "              \"lt\": \"2016-01-01T01:00:00.000Z\"," + "              \"format\": \"date_time\"" + "            }"
                + "          }" + "        }" + "      ]" + "    }" + "  }," + "  \"aggs\":{" + "    \"earliestTime\":{"
                + "      \"min\":{\"field\":\"@timestamp\"}" + "    }," + "    \"latestTime\":{"
                + "      \"max\":{\"field\":\"@timestamp\"}" + "    }" + "  }" + "}";
        assertEquals(expected.replaceAll(" ", ""), dataSummaryQuery.replaceAll(" ", ""));
    }
    public void testCreateDataSummaryQuery_GivenQueryAndAggs() {
        ElasticsearchQueryBuilder queryBuilder = new ElasticsearchQueryBuilder(MATCH_ALL_QUERY, "{\"my_aggs\":{}}", null, "@timestamp");
        assertTrue(queryBuilder.isAggregated());
        String dataSummaryQuery = queryBuilder.createDataSummaryQuery(1451606400000L, 1451610000000L);
        String expected = "{" + "  \"sort\": [" + "    {\"_doc\": {\"order\": \"asc\"}}" + "  ]," + "  \"query\": {" + "    \"bool\": {"
                + "      \"filter\": [" + "        {\"match_all\":{}}," + "        {" + "          \"range\": {"
                + "            \"@timestamp\": {" + "              \"gte\": \"2016-01-01T00:00:00.000Z\","
                + "              \"lt\": \"2016-01-01T01:00:00.000Z\"," + "              \"format\": \"date_time\"" + "            }"
                + "          }" + "        }" + "      ]" + "    }" + "  }," + "  \"aggs\":{" + "    \"earliestTime\":{"
                + "      \"min\":{\"field\":\"@timestamp\"}" + "    }," + "    \"latestTime\":{"
                + "      \"max\":{\"field\":\"@timestamp\"}" + "    }" + "  }" + "}";
        assertEquals(expected.replaceAll(" ", ""), dataSummaryQuery.replaceAll(" ", ""));
    }
    public void testLogQueryInfo_GivenNoAggsNoFields() {
        ElasticsearchQueryBuilder queryBuilder = new ElasticsearchQueryBuilder(MATCH_ALL_QUERY, null, null, "@timestamp");
        Logger logger = mock(Logger.class);
        queryBuilder.logQueryInfo(logger);
        verify(logger).debug("Will retrieve whole _source document from Elasticsearch");
    }
    public void testLogQueryInfo() {
        ElasticsearchQueryBuilder queryBuilder = new ElasticsearchQueryBuilder(MATCH_ALL_QUERY, "{\"my_aggs\":{ \"foo\": \"bar\" }}",
                null, "@timestamp");
        Logger logger = mock(Logger.class);
        queryBuilder.logQueryInfo(logger);
        verify(logger).debug("Will use the following Elasticsearch aggregations: {\"my_aggs\":{ \"foo\": \"bar\" }}");
    }
 }
--- a/elasticsearch/src/test/java/org/elasticsearch/xpack/prelert/scheduler/http/ElasticsearchUrlBuilderTests.java
+++ b/elasticsearch/src/test/java/org/elasticsearch/xpack/prelert/scheduler/http/ElasticsearchUrlBuilderTests.java
@ -1,60 +0,0 @@
 /*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License;
 * you may not use this file except in compliance with the Elastic License.
 */
 package org.elasticsearch.xpack.prelert.scheduler.http;
 import org.elasticsearch.test.ESTestCase;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
 public class ElasticsearchUrlBuilderTests extends ESTestCase {
    private static final List<String> SINGLE_INDEX = Arrays.asList("foo-*");
    private static final List<String> TWO_INDEXES = Arrays.asList("index_1", "index_2");
    private static final List<String> EMPTY_TYPES = Collections.emptyList();
    private static final List<String> TWO_TYPES = Arrays.asList("type_1", "type_2");
    public void testBuildIndexSettingsUrl() {
        String url = ElasticsearchUrlBuilder.create(SINGLE_INDEX, TWO_TYPES).buildIndexSettingsUrl("foo");
        assertEquals("http://localhost:9200/foo/_settings", url);
    }
    public void testBuildInitScrollUrl_GivenMultipleIndicesAndTypes() {
        String url = ElasticsearchUrlBuilder.create(TWO_INDEXES, TWO_TYPES).buildInitScrollUrl(5000);
        assertEquals("http://localhost:9200/index_1,index_2/type_1,type_2/_search?scroll=60m&size=5000", url);
    }
    public void testBuildContinueScrollUrl() {
        String url = ElasticsearchUrlBuilder.create(SINGLE_INDEX, TWO_TYPES).buildContinueScrollUrl();
        assertEquals("http://localhost:9200/_search/scroll?scroll=60m", url);
    }
    public void testBuildClearScrollUrl() {
        String url = ElasticsearchUrlBuilder.create(SINGLE_INDEX, TWO_TYPES).buildClearScrollUrl();
        assertEquals("http://localhost:9200/_search/scroll", url);
    }
    public void testBuildSearchSizeOneUrl_GivenMultipleIndicesAndTypes() {
        String url = ElasticsearchUrlBuilder.create(TWO_INDEXES, TWO_TYPES).buildSearchSizeOneUrl();
        assertEquals("http://localhost:9200/index_1,index_2/type_1,type_2/_search?size=1", url);
    }
    public void testBuildSearchSizeOneUrl_GivenMultipleIndicesAndEmptyTypes() {
        String url = ElasticsearchUrlBuilder.create(TWO_INDEXES, EMPTY_TYPES).buildSearchSizeOneUrl();
        assertEquals("http://localhost:9200/index_1,index_2/_search?size=1", url);
    }
    public void testGetBaseUrl_GivenNoEndingSlash() {
        String url = ElasticsearchUrlBuilder.create(SINGLE_INDEX, TWO_TYPES).getBaseUrl();
        assertEquals("http://localhost:9200/", url);
    }
    public void testGetBaseUrl_GivenEndingSlash() {
        String url = ElasticsearchUrlBuilder.create(SINGLE_INDEX, TWO_TYPES).getBaseUrl();
        assertEquals("http://localhost:9200/", url);
    }
 }
--- a/elasticsearch/src/test/java/org/elasticsearch/xpack/prelert/scheduler/http/HttpResponseTests.java
+++ b/elasticsearch/src/test/java/org/elasticsearch/xpack/prelert/scheduler/http/HttpResponseTests.java
@ -1,40 +0,0 @@
 /*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License;
 * you may not use this file except in compliance with the Elastic License.
 */
 package org.elasticsearch.xpack.prelert.scheduler.http;
 import org.elasticsearch.test.ESTestCase;
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.UncheckedIOException;
 import java.nio.charset.StandardCharsets;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.verify;
 public class HttpResponseTests extends ESTestCase {
    public void testGetResponseAsStream() throws IOException {
        InputStream stream = new ByteArrayInputStream("foo\nbar".getBytes(StandardCharsets.UTF_8));
        HttpResponse response = new HttpResponse(stream, 200);
        assertEquals("foo\nbar", response.getResponseAsString());
        assertEquals(200, response.getResponseCode());
    }
    public void testGetResponseAsStream_GivenStreamThrows() throws IOException {
        InputStream stream = mock(InputStream.class);
        HttpResponse response = new HttpResponse(stream, 200);
        try {
            response.getResponseAsString();
            fail();
        } catch (UncheckedIOException e) {
            verify(stream).close();
        }
    }
 }