[ML] Retry after broken scroll (elastic/x-pack-elasticsearch#1713)
Original commit: elastic/x-pack-elasticsearch@b4fc329c52
This commit is contained in:
parent
4e085f03b7
commit
1010f73ae7
|
@ -65,7 +65,7 @@ public class DatafeedJobBuilder {
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
// Step 3. Create data extractory factory
|
// Step 3. Create data extractor factory
|
||||||
Consumer<DataCounts> dataCountsHandler = dataCounts -> {
|
Consumer<DataCounts> dataCountsHandler = dataCounts -> {
|
||||||
if (dataCounts.getLatestRecordTimeStamp() != null) {
|
if (dataCounts.getLatestRecordTimeStamp() != null) {
|
||||||
context.latestRecordTimeMs = dataCounts.getLatestRecordTimeStamp().getTime();
|
context.latestRecordTimeMs = dataCounts.getLatestRecordTimeStamp().getTime();
|
||||||
|
|
|
@ -12,6 +12,7 @@ import org.elasticsearch.action.search.SearchRequestBuilder;
|
||||||
import org.elasticsearch.action.search.SearchResponse;
|
import org.elasticsearch.action.search.SearchResponse;
|
||||||
import org.elasticsearch.action.search.SearchScrollAction;
|
import org.elasticsearch.action.search.SearchScrollAction;
|
||||||
import org.elasticsearch.client.Client;
|
import org.elasticsearch.client.Client;
|
||||||
|
import org.elasticsearch.common.inject.internal.Nullable;
|
||||||
import org.elasticsearch.common.logging.Loggers;
|
import org.elasticsearch.common.logging.Loggers;
|
||||||
import org.elasticsearch.common.unit.TimeValue;
|
import org.elasticsearch.common.unit.TimeValue;
|
||||||
import org.elasticsearch.script.Script;
|
import org.elasticsearch.script.Script;
|
||||||
|
@ -50,11 +51,14 @@ class ScrollDataExtractor implements DataExtractor {
|
||||||
private boolean isCancelled;
|
private boolean isCancelled;
|
||||||
private boolean hasNext;
|
private boolean hasNext;
|
||||||
private Long timestampOnCancel;
|
private Long timestampOnCancel;
|
||||||
|
private Long lastTimestamp;
|
||||||
|
private boolean searchHasShardFailure;
|
||||||
|
|
||||||
ScrollDataExtractor(Client client, ScrollDataExtractorContext dataExtractorContext) {
|
ScrollDataExtractor(Client client, ScrollDataExtractorContext dataExtractorContext) {
|
||||||
this.client = Objects.requireNonNull(client);
|
this.client = Objects.requireNonNull(client);
|
||||||
this.context = Objects.requireNonNull(dataExtractorContext);
|
context = Objects.requireNonNull(dataExtractorContext);
|
||||||
this.hasNext = true;
|
hasNext = true;
|
||||||
|
searchHasShardFailure = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -78,16 +82,17 @@ class ScrollDataExtractor implements DataExtractor {
|
||||||
if (!hasNext()) {
|
if (!hasNext()) {
|
||||||
throw new NoSuchElementException();
|
throw new NoSuchElementException();
|
||||||
}
|
}
|
||||||
Optional<InputStream> stream = scrollId == null ? Optional.ofNullable(initScroll()) : Optional.ofNullable(continueScroll());
|
Optional<InputStream> stream = scrollId == null ?
|
||||||
|
Optional.ofNullable(initScroll(context.start)) : Optional.ofNullable(continueScroll());
|
||||||
if (!stream.isPresent()) {
|
if (!stream.isPresent()) {
|
||||||
hasNext = false;
|
hasNext = false;
|
||||||
}
|
}
|
||||||
return stream;
|
return stream;
|
||||||
}
|
}
|
||||||
|
|
||||||
private InputStream initScroll() throws IOException {
|
protected InputStream initScroll(long startTimestamp) throws IOException {
|
||||||
LOGGER.debug("[{}] Initializing scroll", context.jobId);
|
LOGGER.debug("[{}] Initializing scroll", context.jobId);
|
||||||
SearchResponse searchResponse = executeSearchRequest(buildSearchRequest());
|
SearchResponse searchResponse = executeSearchRequest(buildSearchRequest(startTimestamp));
|
||||||
return processSearchResponse(searchResponse);
|
return processSearchResponse(searchResponse);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -95,7 +100,7 @@ class ScrollDataExtractor implements DataExtractor {
|
||||||
return searchRequestBuilder.get();
|
return searchRequestBuilder.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
private SearchRequestBuilder buildSearchRequest() {
|
private SearchRequestBuilder buildSearchRequest(long start) {
|
||||||
SearchRequestBuilder searchRequestBuilder = SearchAction.INSTANCE.newRequestBuilder(client)
|
SearchRequestBuilder searchRequestBuilder = SearchAction.INSTANCE.newRequestBuilder(client)
|
||||||
.setScroll(SCROLL_TIMEOUT)
|
.setScroll(SCROLL_TIMEOUT)
|
||||||
.addSort(context.extractedFields.timeField(), SortOrder.ASC)
|
.addSort(context.extractedFields.timeField(), SortOrder.ASC)
|
||||||
|
@ -103,7 +108,7 @@ class ScrollDataExtractor implements DataExtractor {
|
||||||
.setTypes(context.types)
|
.setTypes(context.types)
|
||||||
.setSize(context.scrollSize)
|
.setSize(context.scrollSize)
|
||||||
.setQuery(ExtractorUtils.wrapInTimeRangeQuery(
|
.setQuery(ExtractorUtils.wrapInTimeRangeQuery(
|
||||||
context.query, context.extractedFields.timeField(), context.start, context.end));
|
context.query, context.extractedFields.timeField(), start, context.end));
|
||||||
|
|
||||||
for (String docValueField : context.extractedFields.getDocValueFields()) {
|
for (String docValueField : context.extractedFields.getDocValueFields()) {
|
||||||
searchRequestBuilder.addDocValueField(docValueField);
|
searchRequestBuilder.addDocValueField(docValueField);
|
||||||
|
@ -136,6 +141,18 @@ class ScrollDataExtractor implements DataExtractor {
|
||||||
}
|
}
|
||||||
|
|
||||||
private InputStream processSearchResponse(SearchResponse searchResponse) throws IOException {
|
private InputStream processSearchResponse(SearchResponse searchResponse) throws IOException {
|
||||||
|
if (searchResponse.getFailedShards() > 0 && searchHasShardFailure == false) {
|
||||||
|
// This could be a transient error with the scroll Id.
|
||||||
|
// Reinitialise the scroll and try again but only once.
|
||||||
|
LOGGER.debug("[{}] Resetting scroll search after shard failure", context.jobId);
|
||||||
|
resetScroll();
|
||||||
|
if (lastTimestamp != null) {
|
||||||
|
lastTimestamp++;
|
||||||
|
}
|
||||||
|
searchHasShardFailure = true;
|
||||||
|
return initScroll(lastTimestamp == null ? context.start : lastTimestamp);
|
||||||
|
}
|
||||||
|
|
||||||
ExtractorUtils.checkSearchWasSuccessful(context.jobId, searchResponse);
|
ExtractorUtils.checkSearchWasSuccessful(context.jobId, searchResponse);
|
||||||
scrollId = searchResponse.getScrollId();
|
scrollId = searchResponse.getScrollId();
|
||||||
if (searchResponse.getHits().getHits().length == 0) {
|
if (searchResponse.getHits().getHits().length == 0) {
|
||||||
|
@ -161,6 +178,8 @@ class ScrollDataExtractor implements DataExtractor {
|
||||||
}
|
}
|
||||||
hitProcessor.process(hit);
|
hitProcessor.process(hit);
|
||||||
}
|
}
|
||||||
|
SearchHit lastHit = searchResponse.getHits().getHits()[searchResponse.getHits().getHits().length -1];
|
||||||
|
lastTimestamp = context.extractedFields.timeFieldValue(lastHit);
|
||||||
}
|
}
|
||||||
return new ByteArrayInputStream(outputStream.toByteArray());
|
return new ByteArrayInputStream(outputStream.toByteArray());
|
||||||
}
|
}
|
||||||
|
@ -178,6 +197,11 @@ class ScrollDataExtractor implements DataExtractor {
|
||||||
.get();
|
.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void resetScroll() {
|
||||||
|
clearScroll(scrollId);
|
||||||
|
scrollId = null;
|
||||||
|
}
|
||||||
|
|
||||||
void clearScroll(String scrollId) {
|
void clearScroll(String scrollId) {
|
||||||
ClearScrollAction.INSTANCE.newRequestBuilder(client).addScrollId(scrollId).get();
|
ClearScrollAction.INSTANCE.newRequestBuilder(client).addScrollId(scrollId).get();
|
||||||
}
|
}
|
||||||
|
|
|
@ -30,9 +30,11 @@ import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.LinkedList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
import java.util.Queue;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import static java.util.Collections.emptyMap;
|
import static java.util.Collections.emptyMap;
|
||||||
|
@ -55,10 +57,11 @@ public class ScrollDataExtractorTests extends ESTestCase {
|
||||||
private QueryBuilder query;
|
private QueryBuilder query;
|
||||||
private List<SearchSourceBuilder.ScriptField> scriptFields;
|
private List<SearchSourceBuilder.ScriptField> scriptFields;
|
||||||
private int scrollSize;
|
private int scrollSize;
|
||||||
|
private long initScrollStartTime;
|
||||||
|
|
||||||
private class TestDataExtractor extends ScrollDataExtractor {
|
private class TestDataExtractor extends ScrollDataExtractor {
|
||||||
|
|
||||||
private SearchResponse nextResponse;
|
private Queue<SearchResponse> responses = new LinkedList<>();
|
||||||
|
|
||||||
TestDataExtractor(long start, long end) {
|
TestDataExtractor(long start, long end) {
|
||||||
this(createContext(start, end));
|
this(createContext(start, end));
|
||||||
|
@ -68,16 +71,22 @@ public class ScrollDataExtractorTests extends ESTestCase {
|
||||||
super(client, context);
|
super(client, context);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected InputStream initScroll(long startTimestamp) throws IOException {
|
||||||
|
initScrollStartTime = startTimestamp;
|
||||||
|
return super.initScroll(startTimestamp);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected SearchResponse executeSearchRequest(SearchRequestBuilder searchRequestBuilder) {
|
protected SearchResponse executeSearchRequest(SearchRequestBuilder searchRequestBuilder) {
|
||||||
capturedSearchRequests.add(searchRequestBuilder);
|
capturedSearchRequests.add(searchRequestBuilder);
|
||||||
return nextResponse;
|
return responses.remove();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected SearchResponse executeSearchScrollRequest(String scrollId) {
|
protected SearchResponse executeSearchScrollRequest(String scrollId) {
|
||||||
capturedContinueScrollIds.add(scrollId);
|
capturedContinueScrollIds.add(scrollId);
|
||||||
return nextResponse;
|
return responses.remove();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -86,7 +95,11 @@ public class ScrollDataExtractorTests extends ESTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
void setNextResponse(SearchResponse searchResponse) {
|
void setNextResponse(SearchResponse searchResponse) {
|
||||||
nextResponse = searchResponse;
|
responses.add(searchResponse);
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getInitScrollStartTime() {
|
||||||
|
return initScrollStartTime;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -263,6 +276,7 @@ public class ScrollDataExtractorTests extends ESTestCase {
|
||||||
public void testExtractionGivenInitSearchResponseHasShardFailures() throws IOException {
|
public void testExtractionGivenInitSearchResponseHasShardFailures() throws IOException {
|
||||||
TestDataExtractor extractor = new TestDataExtractor(1000L, 2000L);
|
TestDataExtractor extractor = new TestDataExtractor(1000L, 2000L);
|
||||||
extractor.setNextResponse(createResponseWithShardFailures());
|
extractor.setNextResponse(createResponseWithShardFailures());
|
||||||
|
extractor.setNextResponse(createResponseWithShardFailures());
|
||||||
|
|
||||||
assertThat(extractor.hasNext(), is(true));
|
assertThat(extractor.hasNext(), is(true));
|
||||||
expectThrows(IOException.class, () -> extractor.next());
|
expectThrows(IOException.class, () -> extractor.next());
|
||||||
|
@ -271,12 +285,61 @@ public class ScrollDataExtractorTests extends ESTestCase {
|
||||||
public void testExtractionGivenInitSearchResponseEncounteredUnavailableShards() throws IOException {
|
public void testExtractionGivenInitSearchResponseEncounteredUnavailableShards() throws IOException {
|
||||||
TestDataExtractor extractor = new TestDataExtractor(1000L, 2000L);
|
TestDataExtractor extractor = new TestDataExtractor(1000L, 2000L);
|
||||||
extractor.setNextResponse(createResponseWithUnavailableShards(1));
|
extractor.setNextResponse(createResponseWithUnavailableShards(1));
|
||||||
|
extractor.setNextResponse(createResponseWithUnavailableShards(1));
|
||||||
|
|
||||||
assertThat(extractor.hasNext(), is(true));
|
assertThat(extractor.hasNext(), is(true));
|
||||||
IOException e = expectThrows(IOException.class, () -> extractor.next());
|
IOException e = expectThrows(IOException.class, () -> extractor.next());
|
||||||
assertThat(e.getMessage(), equalTo("[" + jobId + "] Search request encountered [1] unavailable shards"));
|
assertThat(e.getMessage(), equalTo("[" + jobId + "] Search request encountered [1] unavailable shards"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testResetScrollAfterFailure() throws IOException {
|
||||||
|
TestDataExtractor extractor = new TestDataExtractor(1000L, 2000L);
|
||||||
|
|
||||||
|
SearchResponse goodResponse = createSearchResponse(
|
||||||
|
Arrays.asList(1100L, 1200L),
|
||||||
|
Arrays.asList("a1", "a2"),
|
||||||
|
Arrays.asList("b1", "b2")
|
||||||
|
);
|
||||||
|
extractor.setNextResponse(goodResponse);
|
||||||
|
extractor.setNextResponse(createResponseWithShardFailures());
|
||||||
|
extractor.setNextResponse(goodResponse);
|
||||||
|
extractor.setNextResponse(createResponseWithShardFailures());
|
||||||
|
|
||||||
|
// first response is good
|
||||||
|
assertThat(extractor.hasNext(), is(true));
|
||||||
|
Optional<InputStream> output = extractor.next();
|
||||||
|
assertThat(output.isPresent(), is(true));
|
||||||
|
// this should recover from the first shard failure and try again
|
||||||
|
assertThat(extractor.hasNext(), is(true));
|
||||||
|
output = extractor.next();
|
||||||
|
assertThat(output.isPresent(), is(true));
|
||||||
|
// A second failure is not tolerated
|
||||||
|
assertThat(extractor.hasNext(), is(true));
|
||||||
|
expectThrows(IOException.class, () -> extractor.next());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testResetScollUsesLastResultTimestamp() throws IOException {
|
||||||
|
TestDataExtractor extractor = new TestDataExtractor(1000L, 2000L);
|
||||||
|
|
||||||
|
SearchResponse goodResponse = createSearchResponse(
|
||||||
|
Arrays.asList(1100L, 1200L),
|
||||||
|
Arrays.asList("a1", "a2"),
|
||||||
|
Arrays.asList("b1", "b2")
|
||||||
|
);
|
||||||
|
|
||||||
|
extractor.setNextResponse(goodResponse);
|
||||||
|
extractor.setNextResponse(createResponseWithShardFailures());
|
||||||
|
extractor.setNextResponse(createResponseWithShardFailures());
|
||||||
|
|
||||||
|
Optional<InputStream> output = extractor.next();
|
||||||
|
assertThat(output.isPresent(), is(true));
|
||||||
|
assertEquals(1000L, extractor.getInitScrollStartTime());
|
||||||
|
|
||||||
|
expectThrows(IOException.class, () -> extractor.next());
|
||||||
|
// the new start time after error is the last record timestamp +1
|
||||||
|
assertEquals(1201L, extractor.getInitScrollStartTime());
|
||||||
|
}
|
||||||
|
|
||||||
public void testDomainSplitScriptField() throws IOException {
|
public void testDomainSplitScriptField() throws IOException {
|
||||||
|
|
||||||
SearchSourceBuilder.ScriptField withoutSplit = new SearchSourceBuilder.ScriptField(
|
SearchSourceBuilder.ScriptField withoutSplit = new SearchSourceBuilder.ScriptField(
|
||||||
|
@ -369,6 +432,7 @@ public class ScrollDataExtractorTests extends ESTestCase {
|
||||||
when(searchResponse.status()).thenReturn(RestStatus.OK);
|
when(searchResponse.status()).thenReturn(RestStatus.OK);
|
||||||
when(searchResponse.getShardFailures()).thenReturn(
|
when(searchResponse.getShardFailures()).thenReturn(
|
||||||
new ShardSearchFailure[] { new ShardSearchFailure(new RuntimeException("shard failed"))});
|
new ShardSearchFailure[] { new ShardSearchFailure(new RuntimeException("shard failed"))});
|
||||||
|
when(searchResponse.getFailedShards()).thenReturn(1);
|
||||||
return searchResponse;
|
return searchResponse;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -377,6 +441,7 @@ public class ScrollDataExtractorTests extends ESTestCase {
|
||||||
when(searchResponse.status()).thenReturn(RestStatus.OK);
|
when(searchResponse.status()).thenReturn(RestStatus.OK);
|
||||||
when(searchResponse.getSuccessfulShards()).thenReturn(2);
|
when(searchResponse.getSuccessfulShards()).thenReturn(2);
|
||||||
when(searchResponse.getTotalShards()).thenReturn(2 + unavailableShards);
|
when(searchResponse.getTotalShards()).thenReturn(2 + unavailableShards);
|
||||||
|
when(searchResponse.getFailedShards()).thenReturn(unavailableShards);
|
||||||
return searchResponse;
|
return searchResponse;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue