[reindex] Timeout if sub-requests timeout
Sadly, it isn't easy to simulate a timeout during an integration test, you just have to cause one. Groovy's sleep should do the job.
This commit is contained in:
parent
f3195cb514
commit
b8d931d23c
|
@ -103,7 +103,8 @@ public abstract class AbstractAsyncBulkByScrollAction<Request extends AbstractBu
|
|||
|
||||
protected abstract BulkRequest buildBulk(Iterable<SearchHit> docs);
|
||||
|
||||
protected abstract Response buildResponse(TimeValue took, List<Failure> indexingFailures, List<ShardSearchFailure> searchFailures);
|
||||
protected abstract Response buildResponse(TimeValue took, List<Failure> indexingFailures, List<ShardSearchFailure> searchFailures,
|
||||
boolean timedOut);
|
||||
|
||||
public void start() {
|
||||
initialSearch();
|
||||
|
@ -161,8 +162,13 @@ public abstract class AbstractAsyncBulkByScrollAction<Request extends AbstractBu
|
|||
return;
|
||||
}
|
||||
setScroll(searchResponse.getScrollId());
|
||||
if (searchResponse.getShardFailures() != null && searchResponse.getShardFailures().length > 0) {
|
||||
startNormalTermination(emptyList(), unmodifiableList(Arrays.asList(searchResponse.getShardFailures())));
|
||||
if ( // If any of the shards failed that should abort the request.
|
||||
(searchResponse.getShardFailures() != null && searchResponse.getShardFailures().length > 0)
|
||||
// Timeouts aren't shard failures but we still need to pass them back to the user.
|
||||
|| searchResponse.isTimedOut()
|
||||
) {
|
||||
startNormalTermination(emptyList(), unmodifiableList(Arrays.asList(searchResponse.getShardFailures())),
|
||||
searchResponse.isTimedOut());
|
||||
return;
|
||||
}
|
||||
long total = searchResponse.getHits().totalHits();
|
||||
|
@ -176,7 +182,7 @@ public abstract class AbstractAsyncBulkByScrollAction<Request extends AbstractBu
|
|||
SearchHit[] docs = searchResponse.getHits().getHits();
|
||||
logger.debug("scroll returned [{}] documents with a scroll id of [{}]", docs.length, searchResponse.getScrollId());
|
||||
if (docs.length == 0) {
|
||||
startNormalTermination(emptyList(), emptyList());
|
||||
startNormalTermination(emptyList(), emptyList(), false);
|
||||
return;
|
||||
}
|
||||
task.countBatch();
|
||||
|
@ -266,13 +272,13 @@ public abstract class AbstractAsyncBulkByScrollAction<Request extends AbstractBu
|
|||
destinationIndices.addAll(destinationIndicesThisBatch);
|
||||
|
||||
if (false == failures.isEmpty()) {
|
||||
startNormalTermination(unmodifiableList(failures), emptyList());
|
||||
startNormalTermination(unmodifiableList(failures), emptyList(), false);
|
||||
return;
|
||||
}
|
||||
|
||||
if (mainRequest.getSize() != SIZE_ALL_MATCHES && task.getSuccessfullyProcessed() >= mainRequest.getSize()) {
|
||||
// We've processed all the requested docs.
|
||||
startNormalTermination(emptyList(), emptyList());
|
||||
startNormalTermination(emptyList(), emptyList(), false);
|
||||
return;
|
||||
}
|
||||
startNextScroll();
|
||||
|
@ -311,9 +317,9 @@ public abstract class AbstractAsyncBulkByScrollAction<Request extends AbstractBu
|
|||
failures.add(failure);
|
||||
}
|
||||
|
||||
void startNormalTermination(List<Failure> indexingFailures, List<ShardSearchFailure> searchFailures) {
|
||||
void startNormalTermination(List<Failure> indexingFailures, List<ShardSearchFailure> searchFailures, boolean timedOut) {
|
||||
if (false == mainRequest.isRefresh()) {
|
||||
finishHim(null, indexingFailures, searchFailures);
|
||||
finishHim(null, indexingFailures, searchFailures, timedOut);
|
||||
return;
|
||||
}
|
||||
RefreshRequest refresh = new RefreshRequest();
|
||||
|
@ -321,7 +327,7 @@ public abstract class AbstractAsyncBulkByScrollAction<Request extends AbstractBu
|
|||
client.admin().indices().refresh(refresh, new ActionListener<RefreshResponse>() {
|
||||
@Override
|
||||
public void onResponse(RefreshResponse response) {
|
||||
finishHim(null, indexingFailures, searchFailures);
|
||||
finishHim(null, indexingFailures, searchFailures, timedOut);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -337,7 +343,7 @@ public abstract class AbstractAsyncBulkByScrollAction<Request extends AbstractBu
|
|||
* @param failure if non null then the request failed catastrophically with this exception
|
||||
*/
|
||||
void finishHim(Throwable failure) {
|
||||
finishHim(failure, emptyList(), emptyList());
|
||||
finishHim(failure, emptyList(), emptyList(), false);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -346,8 +352,9 @@ public abstract class AbstractAsyncBulkByScrollAction<Request extends AbstractBu
|
|||
* @param failure if non null then the request failed catastrophically with this exception
|
||||
* @param indexingFailures any indexing failures accumulated during the request
|
||||
* @param searchFailures any search failures accumulated during the request
|
||||
* @param timedOut have any of the sub-requests timed out?
|
||||
*/
|
||||
void finishHim(Throwable failure, List<Failure> indexingFailures, List<ShardSearchFailure> searchFailures) {
|
||||
void finishHim(Throwable failure, List<Failure> indexingFailures, List<ShardSearchFailure> searchFailures, boolean timedOut) {
|
||||
String scrollId = scroll.get();
|
||||
if (Strings.hasLength(scrollId)) {
|
||||
/*
|
||||
|
@ -369,7 +376,8 @@ public abstract class AbstractAsyncBulkByScrollAction<Request extends AbstractBu
|
|||
});
|
||||
}
|
||||
if (failure == null) {
|
||||
listener.onResponse(buildResponse(timeValueNanos(System.nanoTime() - startTime.get()), indexingFailures, searchFailures));
|
||||
listener.onResponse(
|
||||
buildResponse(timeValueNanos(System.nanoTime() - startTime.get()), indexingFailures, searchFailures, timedOut));
|
||||
} else {
|
||||
listener.onFailure(failure);
|
||||
}
|
||||
|
|
|
@ -45,16 +45,18 @@ public class BulkIndexByScrollResponse extends ActionResponse implements ToXCont
|
|||
private BulkByScrollTask.Status status;
|
||||
private List<Failure> indexingFailures;
|
||||
private List<ShardSearchFailure> searchFailures;
|
||||
private boolean timedOut;
|
||||
|
||||
public BulkIndexByScrollResponse() {
|
||||
}
|
||||
|
||||
public BulkIndexByScrollResponse(TimeValue took, BulkByScrollTask.Status status, List<Failure> indexingFailures,
|
||||
List<ShardSearchFailure> searchFailures) {
|
||||
List<ShardSearchFailure> searchFailures, boolean timedOut) {
|
||||
this.took = took;
|
||||
this.status = requireNonNull(status, "Null status not supported");
|
||||
this.indexingFailures = indexingFailures;
|
||||
this.searchFailures = searchFailures;
|
||||
this.timedOut = timedOut;
|
||||
}
|
||||
|
||||
public TimeValue getTook() {
|
||||
|
@ -103,6 +105,13 @@ public class BulkIndexByScrollResponse extends ActionResponse implements ToXCont
|
|||
return searchFailures;
|
||||
}
|
||||
|
||||
/**
|
||||
* Did any of the sub-requests that were part of this request timeout?
|
||||
*/
|
||||
public boolean isTimedOut() {
|
||||
return timedOut;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeTo(StreamOutput out) throws IOException {
|
||||
super.writeTo(out);
|
||||
|
@ -116,6 +125,7 @@ public class BulkIndexByScrollResponse extends ActionResponse implements ToXCont
|
|||
for (ShardSearchFailure failure: searchFailures) {
|
||||
failure.writeTo(out);
|
||||
}
|
||||
out.writeBoolean(timedOut);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -135,11 +145,13 @@ public class BulkIndexByScrollResponse extends ActionResponse implements ToXCont
|
|||
searchFailures.add(readShardSearchFailure(in));
|
||||
}
|
||||
this.searchFailures = unmodifiableList(searchFailures);
|
||||
this.timedOut = in.readBoolean();
|
||||
}
|
||||
|
||||
@Override
|
||||
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
||||
builder.field("took", took.millis());
|
||||
builder.field("timed_out", timedOut);
|
||||
status.innerXContent(builder, params, false, false);
|
||||
builder.startArray("failures");
|
||||
for (Failure failure: indexingFailures) {
|
||||
|
|
|
@ -36,6 +36,9 @@ public class BulkIndexByScrollResponseContentListener<R extends BulkIndexByScrol
|
|||
@Override
|
||||
protected RestStatus getStatus(R response) {
|
||||
RestStatus status = RestStatus.OK;
|
||||
if (response.isTimedOut()) {
|
||||
status = RestStatus.REQUEST_TIMEOUT;
|
||||
}
|
||||
for (Failure failure : response.getIndexingFailures()) {
|
||||
if (failure.getStatus().getStatus() > status.getStatus()) {
|
||||
status = failure.getStatus();
|
||||
|
|
|
@ -35,8 +35,9 @@ public class ReindexResponse extends BulkIndexByScrollResponse {
|
|||
public ReindexResponse() {
|
||||
}
|
||||
|
||||
public ReindexResponse(TimeValue took, Status status, List<Failure> indexingFailures, List<ShardSearchFailure> searchFailures) {
|
||||
super(took, status, indexingFailures, searchFailures);
|
||||
public ReindexResponse(TimeValue took, Status status, List<Failure> indexingFailures, List<ShardSearchFailure> searchFailures,
|
||||
boolean timedOut) {
|
||||
super(took, status, indexingFailures, searchFailures, timedOut);
|
||||
}
|
||||
|
||||
public long getCreated() {
|
||||
|
@ -46,6 +47,7 @@ public class ReindexResponse extends BulkIndexByScrollResponse {
|
|||
@Override
|
||||
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
||||
builder.field("took", getTook());
|
||||
builder.field("timed_out", isTimedOut());
|
||||
getStatus().innerXContent(builder, params, true, false);
|
||||
builder.startArray("failures");
|
||||
for (Failure failure: getIndexingFailures()) {
|
||||
|
|
|
@ -107,7 +107,10 @@ public class RestUpdateByQueryAction extends
|
|||
internalRequest.setSize(internalRequest.getSearchRequest().source().size());
|
||||
internalRequest.setPipeline(request.param("pipeline"));
|
||||
internalRequest.getSearchRequest().source().size(request.paramAsInt("scroll_size", scrollSize));
|
||||
|
||||
// Let the requester set search timeout. It is probably only going to be useful for testing but who knows.
|
||||
if (request.hasParam("search_timeout")) {
|
||||
internalRequest.getSearchRequest().source().timeout(request.paramAsTime("search_timeout", null));
|
||||
}
|
||||
|
||||
execute(request, internalRequest, channel);
|
||||
}
|
||||
|
|
|
@ -191,8 +191,9 @@ public class TransportReindexAction extends HandledTransportAction<ReindexReques
|
|||
}
|
||||
|
||||
@Override
|
||||
protected ReindexResponse buildResponse(TimeValue took, List<Failure> indexingFailures, List<ShardSearchFailure> searchFailures) {
|
||||
return new ReindexResponse(took, task.getStatus(), indexingFailures, searchFailures);
|
||||
protected ReindexResponse buildResponse(TimeValue took, List<Failure> indexingFailures, List<ShardSearchFailure> searchFailures,
|
||||
boolean timedOut) {
|
||||
return new ReindexResponse(took, task.getStatus(), indexingFailures, searchFailures, timedOut);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -96,8 +96,8 @@ public class TransportUpdateByQueryAction extends HandledTransportAction<UpdateB
|
|||
|
||||
@Override
|
||||
protected BulkIndexByScrollResponse buildResponse(TimeValue took, List<Failure> indexingFailures,
|
||||
List<ShardSearchFailure> searchFailures) {
|
||||
return new BulkIndexByScrollResponse(took, task.getStatus(), indexingFailures, searchFailures);
|
||||
List<ShardSearchFailure> searchFailures, boolean timedOut) {
|
||||
return new BulkIndexByScrollResponse(took, task.getStatus(), indexingFailures, searchFailures, timedOut);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -248,15 +248,33 @@ public class AsyncBulkByScrollActionTests extends ESTestCase {
|
|||
*/
|
||||
public void testShardFailuresAbortRequest() throws Exception {
|
||||
ShardSearchFailure shardFailure = new ShardSearchFailure(new RuntimeException("test"));
|
||||
new DummyAbstractAsyncBulkByScrollAction()
|
||||
.onScrollResponse(new SearchResponse(null, scrollId(), 5, 4, randomLong(), new ShardSearchFailure[] { shardFailure }));
|
||||
InternalSearchResponse internalResponse = new InternalSearchResponse(null, null, null, null, false, null);
|
||||
new DummyAbstractAsyncBulkByScrollAction().onScrollResponse(
|
||||
new SearchResponse(internalResponse, scrollId(), 5, 4, randomLong(), new ShardSearchFailure[] { shardFailure }));
|
||||
BulkIndexByScrollResponse response = listener.get();
|
||||
assertThat(response.getIndexingFailures(), emptyCollectionOf(Failure.class));
|
||||
assertThat(response.getSearchFailures(), contains(shardFailure));
|
||||
assertFalse(response.isTimedOut());
|
||||
assertNull(response.getReasonCancelled());
|
||||
assertThat(client.scrollsCleared, contains(scrollId));
|
||||
}
|
||||
|
||||
/**
|
||||
* Mimicks search timeouts.
|
||||
*/
|
||||
public void testSearchTimeoutsAbortRequest() throws Exception {
|
||||
InternalSearchResponse internalResponse = new InternalSearchResponse(null, null, null, null, true, null);
|
||||
new DummyAbstractAsyncBulkByScrollAction()
|
||||
.onScrollResponse(new SearchResponse(internalResponse, scrollId(), 5, 4, randomLong(), new ShardSearchFailure[0]));
|
||||
BulkIndexByScrollResponse response = listener.get();
|
||||
assertThat(response.getIndexingFailures(), emptyCollectionOf(Failure.class));
|
||||
assertThat(response.getSearchFailures(), emptyCollectionOf(ShardSearchFailure.class));
|
||||
assertTrue(response.isTimedOut());
|
||||
assertNull(response.getReasonCancelled());
|
||||
assertThat(client.scrollsCleared, contains(scrollId));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Mimicks bulk indexing failures.
|
||||
*/
|
||||
|
@ -396,7 +414,7 @@ public class AsyncBulkByScrollActionTests extends ESTestCase {
|
|||
public void testCancelBeforeStartNormalTermination() throws Exception {
|
||||
// Refresh or not doesn't matter - we don't try to refresh.
|
||||
mainRequest.setRefresh(usually());
|
||||
cancelTaskCase((DummyAbstractAsyncBulkByScrollAction action) -> action.startNormalTermination(emptyList(), emptyList()));
|
||||
cancelTaskCase((DummyAbstractAsyncBulkByScrollAction action) -> action.startNormalTermination(emptyList(), emptyList(), false));
|
||||
// This wouldn't return if we called refresh - the action would hang waiting for the refresh that we haven't mocked.
|
||||
}
|
||||
|
||||
|
@ -430,8 +448,8 @@ public class AsyncBulkByScrollActionTests extends ESTestCase {
|
|||
|
||||
@Override
|
||||
protected BulkIndexByScrollResponse buildResponse(TimeValue took, List<Failure> indexingFailures,
|
||||
List<ShardSearchFailure> searchFailures) {
|
||||
return new BulkIndexByScrollResponse(took, task.getStatus(), indexingFailures, searchFailures);
|
||||
List<ShardSearchFailure> searchFailures, boolean timedOut) {
|
||||
return new BulkIndexByScrollResponse(took, task.getStatus(), indexingFailures, searchFailures, timedOut);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -102,7 +102,7 @@ public class RoundTripTests extends ESTestCase {
|
|||
|
||||
public void testReindexResponse() throws IOException {
|
||||
ReindexResponse response = new ReindexResponse(timeValueMillis(randomPositiveLong()), randomStatus(), randomIndexingFailures(),
|
||||
randomSearchFailures());
|
||||
randomSearchFailures(), randomBoolean());
|
||||
ReindexResponse tripped = new ReindexResponse();
|
||||
roundTrip(response, tripped);
|
||||
assertResponseEquals(response, tripped);
|
||||
|
@ -110,7 +110,7 @@ public class RoundTripTests extends ESTestCase {
|
|||
|
||||
public void testBulkIndexByScrollResponse() throws IOException {
|
||||
BulkIndexByScrollResponse response = new BulkIndexByScrollResponse(timeValueMillis(randomPositiveLong()), randomStatus(),
|
||||
randomIndexingFailures(), randomSearchFailures());
|
||||
randomIndexingFailures(), randomSearchFailures(), randomBoolean());
|
||||
BulkIndexByScrollResponse tripped = new BulkIndexByScrollResponse();
|
||||
roundTrip(response, tripped);
|
||||
assertResponseEquals(response, tripped);
|
||||
|
|
|
@ -75,6 +75,7 @@
|
|||
index: source
|
||||
dest:
|
||||
index: dest
|
||||
- is_false: timed_out
|
||||
- match: {task: '/.+:\d+/'}
|
||||
- set: {task: task}
|
||||
- is_false: updated
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
- do:
|
||||
update-by-query:
|
||||
index: test
|
||||
- is_false: timed_out
|
||||
- match: {updated: 1}
|
||||
- match: {version_conflicts: 0}
|
||||
- match: {batches: 1}
|
||||
|
|
|
@ -0,0 +1,29 @@
|
|||
---
|
||||
"Timeout":
|
||||
- do:
|
||||
index:
|
||||
index: twitter
|
||||
type: tweet
|
||||
id: 1
|
||||
body: { "user": "kimchy" }
|
||||
- do:
|
||||
indices.refresh: {}
|
||||
|
||||
- do:
|
||||
catch: request_timeout
|
||||
reindex:
|
||||
refresh: true
|
||||
body:
|
||||
source:
|
||||
index: twitter
|
||||
timeout: 10ms
|
||||
query:
|
||||
script:
|
||||
# Sleep 100x longer than the timeout. That should cause a timeout!
|
||||
# Return true causes the document to try to be collected which is what actually triggers the timeout.
|
||||
script: sleep(1000); return true
|
||||
dest:
|
||||
index: new_twitter
|
||||
- is_true: timed_out
|
||||
- match: {created: 0}
|
||||
- match: {noops: 0}
|
|
@ -0,0 +1,26 @@
|
|||
---
|
||||
"Timeout":
|
||||
- do:
|
||||
index:
|
||||
index: twitter
|
||||
type: tweet
|
||||
id: 1
|
||||
body: { "user": "kimchy" }
|
||||
- do:
|
||||
indices.refresh: {}
|
||||
|
||||
- do:
|
||||
catch: request_timeout
|
||||
update-by-query:
|
||||
index: twitter
|
||||
refresh: true
|
||||
search_timeout: 10ms
|
||||
body:
|
||||
query:
|
||||
script:
|
||||
# Sleep 100x longer than the timeout. That should cause a timeout!
|
||||
# Return true causes the document to try to be collected which is what actually triggers the timeout.
|
||||
script: sleep(1000); return true
|
||||
- is_true: timed_out
|
||||
- match: {updated: 0}
|
||||
- match: {noops: 0}
|
|
@ -105,6 +105,10 @@
|
|||
"options" : ["query_then_fetch", "dfs_query_then_fetch"],
|
||||
"description" : "Search operation type"
|
||||
},
|
||||
"search_timeout": {
|
||||
"type" : "time",
|
||||
"description" : "Explicit timeout for each search request. Defaults to no timeout."
|
||||
},
|
||||
"size": {
|
||||
"type" : "number",
|
||||
"description" : "Number of hits to return (default: 10)"
|
||||
|
|
Loading…
Reference in New Issue