Reindex max_docs parameter name (#42942)

Previously, a reindex request had two different size specifications in the body:
* Outer level, determining the maximum documents to process
* Inside the source element, determining the scroll/batch size.

The outer level size has now been renamed to max_docs to
avoid confusion and clarify its semantics, with backwards compatibility and
deprecation warnings for using size.
Similarly, the size parameter has been renamed to max_docs for
update/delete-by-query to keep the 3 interfaces consistent.

Finally, all 3 endpoints now support max_docs in both body and URL.

Relates #24344
This commit is contained in:
Henning Andersen 2019-06-07 12:16:36 +02:00 committed by GitHub
parent 5929803413
commit dea935ac31
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
34 changed files with 825 additions and 91 deletions

View File

@ -585,8 +585,8 @@ final class RequestConverters {
if (updateByQueryRequest.getScrollTime() != AbstractBulkByScrollRequest.DEFAULT_SCROLL_TIMEOUT) {
params.putParam("scroll", updateByQueryRequest.getScrollTime());
}
if (updateByQueryRequest.getSize() > 0) {
params.putParam("size", Integer.toString(updateByQueryRequest.getSize()));
if (updateByQueryRequest.getMaxDocs() > 0) {
params.putParam("max_docs", Integer.toString(updateByQueryRequest.getMaxDocs()));
}
request.addParameters(params.asMap());
request.setEntity(createEntity(updateByQueryRequest, REQUEST_BODY_CONTENT_TYPE));
@ -613,8 +613,8 @@ final class RequestConverters {
if (deleteByQueryRequest.getScrollTime() != AbstractBulkByScrollRequest.DEFAULT_SCROLL_TIMEOUT) {
params.putParam("scroll", deleteByQueryRequest.getScrollTime());
}
if (deleteByQueryRequest.getSize() > 0) {
params.putParam("size", Integer.toString(deleteByQueryRequest.getSize()));
if (deleteByQueryRequest.getMaxDocs() > 0) {
params.putParam("max_docs", Integer.toString(deleteByQueryRequest.getMaxDocs()));
}
request.addParameters(params.asMap());
request.setEntity(createEntity(deleteByQueryRequest, REQUEST_BODY_CONTENT_TYPE));

View File

@ -436,7 +436,11 @@ public class RequestConvertersTests extends ESTestCase {
reindexRequest.setDestRouting("=cat");
}
if (randomBoolean()) {
reindexRequest.setSize(randomIntBetween(100, 1000));
if (randomBoolean()) {
reindexRequest.setMaxDocs(randomIntBetween(100, 1000));
} else {
reindexRequest.setSize(randomIntBetween(100, 1000));
}
}
if (randomBoolean()) {
reindexRequest.setAbortOnVersionConflict(false);
@ -488,8 +492,12 @@ public class RequestConvertersTests extends ESTestCase {
}
if (randomBoolean()) {
int size = randomIntBetween(100, 1000);
updateByQueryRequest.setSize(size);
expectedParams.put("size", Integer.toString(size));
if (randomBoolean()) {
updateByQueryRequest.setMaxDocs(size);
} else {
updateByQueryRequest.setSize(size);
}
expectedParams.put("max_docs", Integer.toString(size));
}
if (randomBoolean()) {
updateByQueryRequest.setAbortOnVersionConflict(false);
@ -538,8 +546,12 @@ public class RequestConvertersTests extends ESTestCase {
}
if (randomBoolean()) {
int size = randomIntBetween(100, 1000);
deleteByQueryRequest.setSize(size);
expectedParams.put("size", Integer.toString(size));
if (randomBoolean()) {
deleteByQueryRequest.setMaxDocs(size);
} else {
deleteByQueryRequest.setSize(size);
}
expectedParams.put("max_docs", Integer.toString(size));
}
if (randomBoolean()) {
deleteByQueryRequest.setAbortOnVersionConflict(false);

View File

@ -824,9 +824,9 @@ public class CRUDDocumentationIT extends ESRestHighLevelClientTestCase {
// tag::reindex-request-conflicts
request.setConflicts("proceed"); // <1>
// end::reindex-request-conflicts
// tag::reindex-request-size
request.setSize(10); // <1>
// end::reindex-request-size
// tag::reindex-request-maxDocs
request.setMaxDocs(10); // <1>
// end::reindex-request-maxDocs
// tag::reindex-request-sourceSize
request.setSourceBatchSize(100); // <1>
// end::reindex-request-sourceSize
@ -1026,9 +1026,9 @@ public class CRUDDocumentationIT extends ESRestHighLevelClientTestCase {
// tag::update-by-query-request-query
request.setQuery(new TermQueryBuilder("user", "kimchy")); // <1>
// end::update-by-query-request-query
// tag::update-by-query-request-size
request.setSize(10); // <1>
// end::update-by-query-request-size
// tag::update-by-query-request-maxDocs
request.setMaxDocs(10); // <1>
// end::update-by-query-request-maxDocs
// tag::update-by-query-request-scrollSize
request.setBatchSize(100); // <1>
// end::update-by-query-request-scrollSize
@ -1148,9 +1148,9 @@ public class CRUDDocumentationIT extends ESRestHighLevelClientTestCase {
// tag::delete-by-query-request-query
request.setQuery(new TermQueryBuilder("user", "kimchy")); // <1>
// end::delete-by-query-request-query
// tag::delete-by-query-request-size
request.setSize(10); // <1>
// end::delete-by-query-request-size
// tag::delete-by-query-request-maxDocs
request.setMaxDocs(10); // <1>
// end::delete-by-query-request-maxDocs
// tag::delete-by-query-request-scrollSize
request.setBatchSize(100); // <1>
// end::delete-by-query-request-scrollSize

View File

@ -51,7 +51,7 @@ otherwise modify the request for matching documents.
include-tagged::{client-reindex-tests}/ReindexDocumentationIT.java[update-by-query-size]
--------------------------------------------------
You can also combine `size` with sorting to limit the documents updated:
You can also combine `maxDocs` with sorting to limit the documents updated:
["source","java",subs="attributes,callouts,macros"]
--------------------------------------------------

View File

@ -39,11 +39,11 @@ include-tagged::{doc-tests-file}[{api}-request-query]
--------------------------------------------------
<1> Only copy documents which have field `user` set to `kimchy`
Its also possible to limit the number of processed documents by setting size.
Its also possible to limit the number of processed documents by setting `maxDocs`.
["source","java",subs="attributes,callouts,macros"]
--------------------------------------------------
include-tagged::{doc-tests-file}[{api}-request-size]
include-tagged::{doc-tests-file}[{api}-request-maxDocs]
--------------------------------------------------
<1> Only copy 10 documents

View File

@ -65,11 +65,11 @@ include-tagged::{doc-tests-file}[{api}-request-query]
--------------------------------------------------
<1> Only copy documents which have field `user` set to `kimchy`
Its also possible to limit the number of processed documents by setting size.
Its also possible to limit the number of processed documents by setting `maxDocs`.
["source","java",subs="attributes,callouts,macros"]
--------------------------------------------------
include-tagged::{doc-tests-file}[{api}-request-size]
include-tagged::{doc-tests-file}[{api}-request-maxDocs]
--------------------------------------------------
<1> Only copy 10 documents
@ -90,7 +90,7 @@ include-tagged::{doc-tests-file}[{api}-request-pipeline]
<1> set pipeline to `my_pipeline`
If you want a particular set of documents from the source index youll need to use sort. If possible, prefer a more
selective query to size and sort.
selective query to maxDocs and sort.
["source","java",subs="attributes,callouts,macros"]
--------------------------------------------------

View File

@ -40,11 +40,11 @@ include-tagged::{doc-tests-file}[{api}-request-query]
--------------------------------------------------
<1> Only copy documents which have field `user` set to `kimchy`
Its also possible to limit the number of processed documents by setting size.
Its also possible to limit the number of processed documents by setting `maxDocs`.
["source","java",subs="attributes,callouts,macros"]
--------------------------------------------------
include-tagged::{doc-tests-file}[{api}-request-size]
include-tagged::{doc-tests-file}[{api}-request-maxDocs]
--------------------------------------------------
<1> Only copy 10 documents

View File

@ -571,11 +571,11 @@ sub-request proportionally.
* Due to the nature of `slices` each sub-request won't get a perfectly even
portion of the documents. All documents will be addressed, but some slices may
be larger than others. Expect larger slices to have a more even distribution.
* Parameters like `requests_per_second` and `size` on a request with `slices`
are distributed proportionally to each sub-request. Combine that with the point
above about distribution being uneven and you should conclude that the using
`size` with `slices` might not result in exactly `size` documents being
deleted.
* Parameters like `requests_per_second` and `max_docs` on a request with
slices` are distributed proportionally to each sub-request. Combine that with
the point above about distribution being uneven and you should conclude that
using `max_docs` with `slices` might not result in exactly `max_docs` documents
being deleted.
* Each sub-request gets a slightly different snapshot of the source index
though these are all taken at approximately the same time.

View File

@ -190,14 +190,14 @@ not a good idea to rely on this behavior. Instead, make sure that IDs are unique
using a script.
It's also possible to limit the number of processed documents by setting
`size`. This will only copy a single document from `twitter` to
`max_docs`. This will only copy a single document from `twitter` to
`new_twitter`:
[source,js]
--------------------------------------------------
POST _reindex
{
"size": 1,
"max_docs": 1,
"source": {
"index": "twitter"
},
@ -211,14 +211,14 @@ POST _reindex
If you want a particular set of documents from the `twitter` index you'll
need to use `sort`. Sorting makes the scroll less efficient but in some contexts
it's worth it. If possible, prefer a more selective query to `size` and `sort`.
it's worth it. If possible, prefer a more selective query to `max_docs` and `sort`.
This will copy 10000 documents from `twitter` into `new_twitter`:
[source,js]
--------------------------------------------------
POST _reindex
{
"size": 10000,
"max_docs": 10000,
"source": {
"index": "twitter",
"sort": { "date": "desc" }
@ -1111,11 +1111,11 @@ sub-request proportionally.
* Due to the nature of `slices` each sub-request won't get a perfectly even
portion of the documents. All documents will be addressed, but some slices may
be larger than others. Expect larger slices to have a more even distribution.
* Parameters like `requests_per_second` and `size` on a request with `slices`
are distributed proportionally to each sub-request. Combine that with the point
above about distribution being uneven and you should conclude that the using
`size` with `slices` might not result in exactly `size` documents being
reindexed.
* Parameters like `requests_per_second` and `max_docs` on a request with
`slices` are distributed proportionally to each sub-request. Combine that with
the point above about distribution being uneven and you should conclude that
using `max_docs` with `slices` might not result in exactly `max_docs` documents
being reindexed.
* Each sub-request gets a slightly different snapshot of the source index,
though these are all taken at approximately the same time.
@ -1232,7 +1232,7 @@ to load only the existing data into the new index and rename any fields if neede
----------------------------------------------------------------
POST _reindex
{
"size": 10,
"max_docs": 10,
"source": {
"index": "twitter",
"query": {

View File

@ -602,11 +602,11 @@ sub-request proportionally.
* Due to the nature of `slices` each sub-request won't get a perfectly even
portion of the documents. All documents will be addressed, but some slices may
be larger than others. Expect larger slices to have a more even distribution.
* Parameters like `requests_per_second` and `size` on a request with `slices`
are distributed proportionally to each sub-request. Combine that with the point
above about distribution being uneven and you should conclude that the using
`size` with `slices` might not result in exactly `size` documents being
updated.
* Parameters like `requests_per_second` and `max_docs` on a request with
`slices` are distributed proportionally to each sub-request. Combine that with
the point above about distribution being uneven and you should conclude that
using `max_docs` with `slices` might not result in exactly `max_docs` documents
being updated.
* Each sub-request gets a slightly different snapshot of the source index
though these are all taken at approximately the same time.

View File

@ -75,7 +75,7 @@ import static java.util.Collections.emptyList;
import static java.util.Collections.unmodifiableList;
import static org.elasticsearch.action.bulk.BackoffPolicy.exponentialBackoff;
import static org.elasticsearch.common.unit.TimeValue.timeValueNanos;
import static org.elasticsearch.index.reindex.AbstractBulkByScrollRequest.SIZE_ALL_MATCHES;
import static org.elasticsearch.index.reindex.AbstractBulkByScrollRequest.MAX_DOCS_ALL_MATCHES;
import static org.elasticsearch.rest.RestStatus.CONFLICT;
import static org.elasticsearch.search.sort.SortBuilders.fieldSort;
@ -263,8 +263,8 @@ public abstract class AbstractAsyncBulkByScrollAction<Request extends AbstractBu
return;
}
long total = response.getTotalHits();
if (mainRequest.getSize() > 0) {
total = min(total, mainRequest.getSize());
if (mainRequest.getMaxDocs() > 0) {
total = min(total, mainRequest.getMaxDocs());
}
worker.setTotal(total);
AbstractRunnable prepareBulkRequestRunnable = new AbstractRunnable() {
@ -304,9 +304,9 @@ public abstract class AbstractAsyncBulkByScrollAction<Request extends AbstractBu
}
worker.countBatch();
List<? extends ScrollableHitSource.Hit> hits = response.getHits();
if (mainRequest.getSize() != SIZE_ALL_MATCHES) {
// Truncate the hits if we have more than the request size
long remaining = max(0, mainRequest.getSize() - worker.getSuccessfullyProcessed());
if (mainRequest.getMaxDocs() != MAX_DOCS_ALL_MATCHES) {
// Truncate the hits if we have more than the request max docs
long remaining = max(0, mainRequest.getMaxDocs() - worker.getSuccessfullyProcessed());
if (remaining < hits.size()) {
hits = hits.subList(0, (int) remaining);
}
@ -395,7 +395,7 @@ public abstract class AbstractAsyncBulkByScrollAction<Request extends AbstractBu
return;
}
if (mainRequest.getSize() != SIZE_ALL_MATCHES && worker.getSuccessfullyProcessed() >= mainRequest.getSize()) {
if (mainRequest.getMaxDocs() != MAX_DOCS_ALL_MATCHES && worker.getSuccessfullyProcessed() >= mainRequest.getMaxDocs()) {
// We've processed all the requested docs.
refreshAndFinish(emptyList(), emptyList(), false);
return;

View File

@ -19,8 +19,8 @@
package org.elasticsearch.index.reindex;
import org.elasticsearch.action.ActionRequestValidationException;
import org.elasticsearch.action.Action;
import org.elasticsearch.action.ActionRequestValidationException;
import org.elasticsearch.action.support.ActiveShardCount;
import org.elasticsearch.client.node.NodeClient;
import org.elasticsearch.common.settings.Settings;
@ -105,6 +105,11 @@ public abstract class AbstractBaseReindexRestHandler<
if (requestsPerSecond != null) {
request.setRequestsPerSecond(requestsPerSecond);
}
if (restRequest.hasParam("max_docs")) {
setMaxDocsValidateIdentical(request, restRequest.paramAsInt("max_docs", -1));
}
return request;
}
@ -170,4 +175,13 @@ public abstract class AbstractBaseReindexRestHandler<
}
return requestsPerSecond;
}
static void setMaxDocsValidateIdentical(AbstractBulkByScrollRequest<?> request, int maxDocs) {
if (request.getMaxDocs() != AbstractBulkByScrollRequest.MAX_DOCS_ALL_MATCHES && request.getMaxDocs() != maxDocs) {
throw new IllegalArgumentException("[max_docs] set to two different values [" + request.getMaxDocs() + "]" +
" and [" + maxDocs + "]");
} else {
request.setMaxDocs(maxDocs);
}
}
}

View File

@ -23,6 +23,7 @@ import org.elasticsearch.action.Action;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.LoggingDeprecationHandler;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentParser;
@ -52,7 +53,7 @@ public abstract class AbstractBulkByQueryRestHandler<
SearchRequest searchRequest = internal.getSearchRequest();
try (XContentParser parser = extractRequestSpecificFields(restRequest, bodyConsumers)) {
RestSearchAction.parseSearchRequest(searchRequest, restRequest, parser, internal::setSize);
RestSearchAction.parseSearchRequest(searchRequest, restRequest, parser, size -> setMaxDocsFromSearchSize(internal, size));
}
searchRequest.source().size(restRequest.paramAsInt("scroll_size", searchRequest.source().size()));
@ -94,4 +95,9 @@ public abstract class AbstractBulkByQueryRestHandler<
parser.getDeprecationHandler(), BytesReference.bytes(builder.map(body)).streamInput());
}
}
private void setMaxDocsFromSearchSize(Request request, int size) {
LoggingDeprecationHandler.INSTANCE.usedDeprecatedName("size", "max_docs");
setMaxDocsValidateIdentical(request, size);
}
}

View File

@ -59,6 +59,7 @@ public class RestDeleteByQueryAction extends AbstractBulkByQueryRestHandler<Dele
Map<String, Consumer<Object>> consumers = new HashMap<>();
consumers.put("conflicts", o -> internal.setConflicts((String) o));
consumers.put("max_docs", s -> setMaxDocsValidateIdentical(internal, ((Number) s).intValue()));
parseInternalRequest(internal, request, consumers);

View File

@ -97,7 +97,7 @@ public class RestReindexAction extends AbstractBaseReindexRestHandler<ReindexReq
PARSER.declareField(sourceParser::parse, new ParseField("source"), ValueType.OBJECT);
PARSER.declareField((p, v, c) -> destParser.parse(p, v.getDestination(), c), new ParseField("dest"), ValueType.OBJECT);
PARSER.declareInt(ReindexRequest::setSize, new ParseField("size"));
PARSER.declareInt(RestReindexAction::setMaxDocsValidateIdentical, new ParseField("max_docs", "size"));
PARSER.declareField((p, v, c) -> v.setScript(Script.parse(p)), new ParseField("script"),
ValueType.OBJECT);
PARSER.declareString(ReindexRequest::setConflicts, new ParseField("conflicts"));

View File

@ -67,6 +67,7 @@ public class RestUpdateByQueryAction extends AbstractBulkByQueryRestHandler<Upda
Map<String, Consumer<Object>> consumers = new HashMap<>();
consumers.put("conflicts", o -> internal.setConflicts((String) o));
consumers.put("script", o -> internal.setScript(parseScript(o)));
consumers.put("max_docs", s -> setMaxDocsValidateIdentical(internal, ((Number) s).intValue()));
parseInternalRequest(internal, request, consumers);

View File

@ -121,7 +121,7 @@ public class ReindexDocumentationIT extends ESIntegTestCase {
new UpdateByQueryRequestBuilder(client, UpdateByQueryAction.INSTANCE);
updateByQuery.source("source_index")
.filter(QueryBuilders.termQuery("level", "awesome"))
.size(1000)
.maxDocs(1000)
.script(new Script(ScriptType.INLINE,
"ctx._source.awesome = 'absolutely'",
"painless",
@ -144,7 +144,7 @@ public class ReindexDocumentationIT extends ESIntegTestCase {
UpdateByQueryRequestBuilder updateByQuery =
new UpdateByQueryRequestBuilder(client, UpdateByQueryAction.INSTANCE);
updateByQuery.source("source_index")
.size(100)
.maxDocs(100)
.source()
.addSort("cat", SortOrder.DESC);
BulkByScrollResponse response = updateByQuery.get();

View File

@ -58,8 +58,8 @@ public class ReindexBasicTests extends ReindexTestCase {
assertThat(copy.get(), matcher().created(2));
assertHitCount(client().prepareSearch("dest_half").setSize(0).get(), 2);
// Limit with size
copy = reindex().source("source").destination("dest_size_one", "type").size(1).refresh(true);
// Limit with maxDocs
copy = reindex().source("source").destination("dest_size_one", "type").maxDocs(1).refresh(true);
assertThat(copy.get(), matcher().created(1));
assertHitCount(client().prepareSearch("dest_size_one").setSize(0).get(), 1);
}
@ -86,7 +86,7 @@ public class ReindexBasicTests extends ReindexTestCase {
copy = reindex().source("source").destination("dest_half", "type").refresh(true);
// Use a small batch size so we have to use more than one batch
copy.source().setSize(5);
copy.size(half); // The real "size" of the request.
copy.maxDocs(half);
assertThat(copy.get(), matcher().created(half).batches(half, 5));
assertHitCount(client().prepareSearch("dest_half").setSize(0).get(), half);
}
@ -116,7 +116,7 @@ public class ReindexBasicTests extends ReindexTestCase {
copy = reindex().source("source").destination("dest_half", "type").refresh(true).setSlices(slices);
// Use a small batch size so we have to use more than one batch
copy.source().setSize(5);
copy.size(half); // The real "size" of the request.
copy.maxDocs(half);
BulkByScrollResponse response = copy.get();
assertThat(response, matcher().created(lessThanOrEqualTo((long) half)).slices(hasSize(expectedSlices)));
assertHitCount(client().prepareSearch("dest_half").setSize(0).get(), response.getCreated());

View File

@ -122,7 +122,11 @@ public class RoundTripTests extends ESTestCase {
request.getSearchRequest().indices("test");
request.getSearchRequest().source().size(between(1, 1000));
if (randomBoolean()) {
request.setSize(between(1, Integer.MAX_VALUE));
if (randomBoolean()) {
request.setMaxDocs(between(1, Integer.MAX_VALUE));
} else {
request.setSize(between(1, Integer.MAX_VALUE));
}
}
request.setAbortOnVersionConflict(random().nextBoolean());
request.setRefresh(rarely());

View File

@ -285,6 +285,56 @@
---
"Limit by size":
- skip:
version: " - 7.1.99"
reason: "deprecation warnings only emitted on 7.2+"
features: warnings
- do:
index:
index: twitter
id: 1
body: { "user": "kimchy" }
- do:
index:
index: twitter
id: 2
body: { "user": "kimchy" }
- do:
indices.refresh: {}
- do:
warnings:
- Deprecated field [size] used, expected [max_docs] instead
delete_by_query:
index: twitter
size: 1
body:
query:
match_all: {}
- match: {deleted: 1}
- match: {version_conflicts: 0}
- match: {batches: 1}
- match: {failures: []}
- match: {throttled_millis: 0}
- gte: { took: 0 }
- do:
indices.refresh: {}
- do:
count:
index: twitter
- match: {count: 1}
---
"Limit by size pre 7.2":
- skip:
version: "7.2.0 - "
reason: "7.2 should use max_docs or get deprecation warning"
- do:
index:
index: twitter
@ -322,6 +372,93 @@
- match: {count: 1}
---
"Limit by max_docs in URL":
- skip:
version: " - 7.1.99"
reason: "max_docs introduced in 7.2.0"
- do:
index:
index: twitter
id: 1
body: { "user": "kimchy" }
- do:
index:
index: twitter
id: 2
body: { "user": "kimchy" }
- do:
indices.refresh: {}
- do:
delete_by_query:
index: twitter
max_docs: 1
body:
query:
match_all: {}
- match: {deleted: 1}
- match: {version_conflicts: 0}
- match: {batches: 1}
- match: {failures: []}
- match: {throttled_millis: 0}
- gte: { took: 0 }
- do:
indices.refresh: {}
- do:
count:
index: twitter
- match: {count: 1}
---
"Limit by max_docs in body":
- skip:
version: " - 7.1.99"
reason: "max_docs introduced in 7.2.0"
- do:
index:
index: twitter
id: 1
body: { "user": "kimchy" }
- do:
index:
index: twitter
id: 2
body: { "user": "kimchy" }
- do:
indices.refresh: {}
- do:
delete_by_query:
index: twitter
body:
max_docs: 1
query:
match:
user: "kimchy"
- match: {deleted: 1}
- match: {version_conflicts: 0}
- match: {batches: 1}
- match: {failures: []}
- match: {throttled_millis: 0}
- gte: { took: 0 }
- do:
indices.refresh: {}
- do:
count:
index: twitter
- match: {count: 1}
---
"Can override scroll_size":
- do:

View File

@ -38,7 +38,7 @@
id: 1
body: { "text": "test" }
- do:
catch: /\[size\] parameter cannot be negative, found \[-4\]/
catch: /\[max_docs\] parameter cannot be negative, found \[-4\]/
delete_by_query:
index: test
size: -4
@ -46,6 +46,47 @@
query:
match_all: {}
---
"invalid max_docs fails":
- skip:
version: " - 7.1.99"
reason: "max_docs introduced in 7.2.0"
- do:
index:
index: test
id: 1
body: { "text": "test" }
- do:
catch: /\[max_docs\] parameter cannot be negative, found \[-4\]/
delete_by_query:
index: test
max_docs: -4
body:
query:
match_all: {}
---
"both max_docs and size fails":
- skip:
version: " - 7.1.99"
reason: "max_docs introduced in 7.2.0"
- do:
index:
index: test
id: 1
body: { "text": "test" }
- do:
catch: /\[max_docs\] set to two different values \[4\] and \[5\]/
delete_by_query:
index: test
size: 4
max_docs: 5
body:
query:
match_all: {}
---
"invalid scroll_size fails":
- do:

View File

@ -102,7 +102,7 @@
id: 1
body: { "text": "test" }
- do:
catch: /\[size\] parameter cannot be negative, found \[-4\]/
catch: /\[max_docs\] parameter cannot be negative, found \[-4\]/
reindex:
body:
source:
@ -111,6 +111,92 @@
index: dest
size: -4
---
"invalid max_docs in body fails":
- skip:
version: " - 7.1.99"
reason: "max_docs introduced in 7.2.0"
- do:
index:
index: test
id: 1
body: { "text": "test" }
- do:
catch: /\[max_docs\] parameter cannot be negative, found \[-4\]/
reindex:
body:
source:
index: test
dest:
index: dest
max_docs: -4
---
"invalid max_docs in URL fails":
- skip:
version: " - 7.1.99"
reason: "max_docs introduced in 7.2.0"
- do:
index:
index: test
id: 1
body: { "text": "test" }
- do:
catch: /\[max_docs\] parameter cannot be negative, found \[-4\]/
reindex:
max_docs: -4
body:
source:
index: test
dest:
index: dest
---
"inconsistent max_docs and size fails":
- skip:
version: " - 7.1.99"
reason: "max_docs introduced in 7.2.0"
- do:
index:
index: test
id: 1
body: { "text": "test" }
- do:
catch: /\[max_docs\] set to two different values \[4\] and \[5\]/
reindex:
body:
source:
index: test
dest:
index: dest
size: 4
max_docs: 5
---
"inconsistent max_docs in body and max_docs in URL fails":
- skip:
version: " - 7.1.99"
reason: "max_docs introduced in 7.2.0"
- do:
index:
index: test
id: 1
body: { "text": "test" }
- do:
catch: /\[max_docs\] set to two different values \[4\] and \[5\]/
reindex:
max_docs: 5
body:
source:
index: test
dest:
index: dest
max_docs: 4
---
"requests_per_second cannot be an empty string":
- do:

View File

@ -33,6 +33,56 @@
---
"Sorting and size combined":
- skip:
version: " - 7.1.99"
reason: "deprecation warnings only emitted on 7.2+"
features: warnings
- do:
index:
index: test
id: 1
body: { "order": 1 }
- do:
index:
index: test
id: 2
body: { "order": 2 }
- do:
indices.refresh: {}
- do:
warnings:
- Deprecated field [size] used, expected [max_docs] instead
reindex:
refresh: true
body:
size: 1
source:
index: test
sort: order
dest:
index: target
- do:
search:
rest_total_hits_as_int: true
index: target
- match: { hits.total: 1 }
- do:
search:
rest_total_hits_as_int: true
index: target
q: order:1
- match: { hits.total: 1 }
---
"Sorting and size combined pre 7.2":
- skip:
version: "7.2.0 - "
reason: "7.2 should use max_docs or get deprecation warning"
- do:
index:
index: test
@ -69,3 +119,81 @@
index: target
q: order:1
- match: { hits.total: 1 }
---
"Sorting and max_docs in body combined":
- skip:
version: " - 7.1.99"
reason: "max_docs introduced in 7.2.0"
- do:
index:
index: test
id: 1
body: { "order": 1 }
- do:
index:
index: test
id: 2
body: { "order": 2 }
- do:
indices.refresh: {}
- do:
reindex:
refresh: true
body:
max_docs: 1
source:
index: test
sort: order
dest:
index: target
- do:
search:
rest_total_hits_as_int: true
index: target
- match: { hits.total: 1 }
- do:
search:
rest_total_hits_as_int: true
index: target
q: order:1
- match: { hits.total: 1 }
---
"max_docs in URL":
- skip:
version: " - 7.1.99"
reason: "max_docs introduced in 7.2.0"
- do:
index:
index: test
id: 1
body: { "value": 17 }
- do:
index:
index: test
id: 2
body: { "value": 17 }
- do:
indices.refresh: {}
- do:
reindex:
refresh: true
max_docs: 1
body:
source:
index: test
dest:
index: target
- do:
search:
rest_total_hits_as_int: true
index: target
- match: { hits.total: 1 }

View File

@ -219,6 +219,10 @@
---
"Reindex from remote with size":
- skip:
version: "7.2.0 - "
reason: "7.2 should use max_docs or get deprecation warning"
- do:
index:
index: source
@ -279,6 +283,72 @@
metric: search
- match: {indices.source.total.search.open_contexts: 0}
---
"Reindex from remote with max_docs":
- skip:
version: " - 7.1.99"
reason: "max_docs introduced in 7.2.0"
- do:
index:
index: source
id: 1
body: { "text": "test" }
refresh: true
- do:
index:
index: source
id: 2
body: { "text": "test" }
refresh: true
# Fetch the http host. We use the host of the master because we know there will always be a master.
- do:
cluster.state: {}
- set: { master_node: master }
- do:
nodes.info:
metric: [ http ]
- is_true: nodes.$master.http.publish_address
- set: {nodes.$master.http.publish_address: host}
- do:
reindex:
refresh: true
body:
max_docs: 1
source:
remote:
host: http://${host}
index: source
dest:
index: dest
- match: {created: 1}
- match: {updated: 0}
- match: {version_conflicts: 0}
- match: {batches: 1}
- match: {failures: []}
- match: {throttled_millis: 0}
- gte: { took: 0 }
- is_false: task
- is_false: deleted
- do:
search:
rest_total_hits_as_int: true
index: dest
body:
query:
match:
text: test
- match: {hits.total: 1}
# Make sure reindex closed all the scroll contexts
- do:
indices.stats:
index: source
metric: search
- match: {indices.source.total.search.open_contexts: 0}
---
"Reindex from remote with broken query":
- do:

View File

@ -222,6 +222,43 @@
---
"Limit by size":
- skip:
version: " - 7.1.99"
reason: "deprecation warnings only emitted on 7.2+"
features: warnings
- do:
index:
index: twitter
id: 1
body: { "user": "kimchy" }
- do:
index:
index: twitter
id: 2
body: { "user": "kimchy" }
- do:
indices.refresh: {}
- do:
warnings:
- Deprecated field [size] used, expected [max_docs] instead
update_by_query:
index: twitter
size: 1
- match: {updated: 1}
- match: {version_conflicts: 0}
- match: {batches: 1}
- match: {failures: []}
- match: {throttled_millis: 0}
- gte: { took: 0 }
---
"Limit by size pre 7.2":
- skip:
version: "7.2.0 - "
reason: "7.2 should use max_docs or get deprecation warning"
- do:
index:
index: twitter
@ -246,6 +283,70 @@
- match: {throttled_millis: 0}
- gte: { took: 0 }
---
"Limit by max_docs in URL":
- skip:
version: " - 7.1.99"
reason: "max_docs introduced in 7.2.0"
- do:
index:
index: twitter
id: 1
body: { "user": "kimchy" }
- do:
index:
index: twitter
id: 2
body: { "user": "kimchy" }
- do:
indices.refresh: {}
- do:
update_by_query:
index: twitter
max_docs: 1
- match: {updated: 1}
- match: {version_conflicts: 0}
- match: {batches: 1}
- match: {failures: []}
- match: {throttled_millis: 0}
- gte: { took: 0 }
---
"Limit by max_docs in body":
- skip:
version: " - 7.1.99"
reason: "max_docs introduced in 7.2.0"
- do:
index:
index: twitter
id: 1
body: { "user": "kimchy" }
- do:
index:
index: twitter
id: 2
body: { "user": "kimchy" }
- do:
indices.refresh: {}
- do:
update_by_query:
index: twitter
body:
max_docs: 1
query:
match:
user: "kimchy"
- match: {updated: 1}
- match: {version_conflicts: 0}
- match: {batches: 1}
- match: {failures: []}
- match: {throttled_millis: 0}
- gte: { took: 0 }
---
"Can override scroll_size":
- do:

View File

@ -19,11 +19,88 @@
id: 1
body: { "text": "test" }
- do:
catch: /\[size\] parameter cannot be negative, found \[-4\]/
catch: /\[max_docs\] parameter cannot be negative, found \[-4\]/
update_by_query:
index: test
size: -4
---
"invalid max_docs in URL fails":
- skip:
version: " - 7.1.99"
reason: "max_docs introduced in 7.2.0"
- do:
index:
index: test
id: 1
body: { "text": "test" }
- do:
catch: /\[max_docs\] parameter cannot be negative, found \[-4\]/
update_by_query:
index: test
max_docs: -4
---
"invalid max_docs in body fails":
- skip:
version: " - 7.1.99"
reason: "max_docs introduced in 7.2.0"
- do:
index:
index: test
id: 1
body: { "text": "test" }
- do:
catch: /\[max_docs\] parameter cannot be negative, found \[-4\]/
update_by_query:
index: test
body:
max_docs: -4
---
"inconsistent max_docs and size fails":
- skip:
version: " - 7.1.99"
reason: "max_docs introduced in 7.2.0"
- do:
index:
index: test
id: 1
body: { "text": "test" }
- do:
catch: /\[max_docs\] set to two different values \[4\] and \[5\]/
delete_by_query:
index: test
size: 4
max_docs: 5
body:
query:
match_all: {}
---
"inconsistent max_docs in body and max_docs in URL fails":
- skip:
version: " - 7.1.99"
reason: "max_docs introduced in 7.2.0"
- do:
index:
index: test
id: 1
body: { "text": "test" }
- do:
catch: /\[max_docs\] set to two different values \[4\] and \[5\]/
delete_by_query:
index: test
max_docs: 5
body:
max_docs: 4
query:
match_all: {}
---
"invalid scroll_size fails":
- do:

View File

@ -99,7 +99,11 @@
},
"size": {
"type" : "number",
"description" : "Number of hits to return (default: 10)"
"description" : "Deprecated, please use `max_docs` instead"
},
"max_docs": {
"type" : "number",
"description" : "Maximum number of documents to process (default: all documents)"
},
"sort": {
"type" : "list",

View File

@ -39,6 +39,10 @@
"type": "number",
"default": 1,
"description": "The number of slices this task should be divided into. Defaults to 1 meaning the task isn't sliced into subtasks."
},
"max_docs": {
"type" : "number",
"description" : "Maximum number of documents to process (default: all documents)"
}
}
},

View File

@ -103,7 +103,11 @@
},
"size": {
"type" : "number",
"description" : "Number of hits to return (default: 10)"
"description" : "Deprecated, please use `max_docs` instead"
},
"max_docs": {
"type" : "number",
"description" : "Maximum number of documents to process (default: all documents)"
},
"sort": {
"type" : "list",

View File

@ -43,7 +43,12 @@ import static org.elasticsearch.common.unit.TimeValue.timeValueMinutes;
public abstract class AbstractBulkByScrollRequest<Self extends AbstractBulkByScrollRequest<Self>> extends ActionRequest {
public static final int SIZE_ALL_MATCHES = -1;
public static final int MAX_DOCS_ALL_MATCHES = -1;
/**
* @deprecated please use MAX_DOCS_ALL_MATCHES instead.
*/
@Deprecated
public static final int SIZE_ALL_MATCHES = MAX_DOCS_ALL_MATCHES;
public static final TimeValue DEFAULT_SCROLL_TIMEOUT = timeValueMinutes(5);
public static final int DEFAULT_SCROLL_SIZE = 1000;
@ -60,7 +65,7 @@ public abstract class AbstractBulkByScrollRequest<Self extends AbstractBulkByScr
* Maximum number of processed documents. Defaults to -1 meaning process all
* documents.
*/
private int size = SIZE_ALL_MATCHES;
private int maxDocs = MAX_DOCS_ALL_MATCHES;
/**
* Should version conflicts cause aborts? Defaults to true.
@ -152,10 +157,10 @@ public abstract class AbstractBulkByScrollRequest<Self extends AbstractBulkByScr
if (maxRetries < 0) {
e = addValidationError("retries cannot be negative", e);
}
if (false == (size == -1 || size > 0)) {
if (false == (maxDocs == -1 || maxDocs > 0)) {
e = addValidationError(
"size should be greater than 0 if the request is limited to some number of documents or -1 if it isn't but it was ["
+ size + "]",
"maxDocs should be greater than 0 if the request is limited to some number of documents or -1 if it isn't but it was ["
+ maxDocs + "]",
e);
}
if (searchRequest.source().slice() != null && slices != DEFAULT_SLICES) {
@ -167,20 +172,41 @@ public abstract class AbstractBulkByScrollRequest<Self extends AbstractBulkByScr
/**
* Maximum number of processed documents. Defaults to -1 meaning process all
* documents.
* @deprecated please use getMaxDocs() instead.
*/
@Deprecated
public int getSize() {
return size;
return getMaxDocs();
}
/**
* Maximum number of processed documents. Defaults to -1 meaning process all
* documents.
*
* @deprecated please use setMaxDocs(int) instead.
*/
@Deprecated
public Self setSize(int size) {
return setMaxDocs(size);
}
/**
* Maximum number of processed documents. Defaults to -1 meaning process all
* documents.
*/
public Self setSize(int size) {
if (size < 0) {
throw new IllegalArgumentException("[size] parameter cannot be negative, found [" + size + "]");
public int getMaxDocs() {
return maxDocs;
}
/**
* Maximum number of processed documents. Defaults to -1 meaning process all
* documents.
*/
public Self setMaxDocs(int maxDocs) {
if (maxDocs < 0) {
throw new IllegalArgumentException("[max_docs] parameter cannot be negative, found [" + maxDocs + "]");
}
this.size = size;
this.maxDocs = maxDocs;
return self();
}
@ -405,10 +431,10 @@ public abstract class AbstractBulkByScrollRequest<Self extends AbstractBulkByScr
.setRequestsPerSecond(requestsPerSecond / totalSlices)
// Sub requests don't have workers
.setSlices(1);
if (size != -1) {
// Size is split between workers. This means the size might round
if (maxDocs != MAX_DOCS_ALL_MATCHES) {
// maxDocs is split between workers. This means the maxDocs might round
// down!
request.setSize(size == SIZE_ALL_MATCHES ? SIZE_ALL_MATCHES : size / totalSlices);
request.setMaxDocs(maxDocs / totalSlices);
}
// Set the parent task so this task is cancelled if we cancel the parent
request.setParentTask(slicingTask);
@ -426,7 +452,7 @@ public abstract class AbstractBulkByScrollRequest<Self extends AbstractBulkByScr
super.readFrom(in);
searchRequest = new SearchRequest(in);
abortOnVersionConflict = in.readBoolean();
size = in.readVInt();
maxDocs = in.readVInt();
refresh = in.readBoolean();
timeout = in.readTimeValue();
activeShardCount = ActiveShardCount.readFrom(in);
@ -441,7 +467,7 @@ public abstract class AbstractBulkByScrollRequest<Self extends AbstractBulkByScr
super.writeTo(out);
searchRequest.writeTo(out);
out.writeBoolean(abortOnVersionConflict);
out.writeVInt(size);
out.writeVInt(maxDocs);
out.writeBoolean(refresh);
out.writeTimeValue(timeout);
activeShardCount.writeTo(out);

View File

@ -67,10 +67,22 @@ public abstract class AbstractBulkByScrollRequestBuilder<
}
/**
* The maximum number of documents to attempt.
* Maximum number of processed documents. Defaults to processing all
* documents.
* @deprecated please use maxDocs(int) instead.
*/
@Deprecated
public Self size(int size) {
request.setSize(size);
return maxDocs(size);
}
/**
* Maximum number of processed documents. Defaults to processing all
* documents.
*/
public Self maxDocs(int maxDocs) {
request.setMaxDocs(maxDocs);
return self();
}

View File

@ -322,8 +322,8 @@ public class ReindexRequest extends AbstractBulkIndexByScrollRequest<ReindexRequ
}
{
// Other fields
if (getSize() != -1 || getSize() > 0) {
builder.field("size", getSize());
if (getMaxDocs() != -1) {
builder.field("max_docs", getMaxDocs());
}
if (getScript() != null) {
builder.field("script", getScript());

View File

@ -42,7 +42,11 @@ public abstract class AbstractBulkByScrollRequestTestCase<R extends AbstractBulk
original.setRequestsPerSecond(
randomBoolean() ? Float.POSITIVE_INFINITY : randomValueOtherThanMany(r -> r < 0, ESTestCase::randomFloat));
if (randomBoolean()) {
original.setSize(between(0, Integer.MAX_VALUE));
if (randomBoolean()) {
original.setMaxDocs(between(0, Integer.MAX_VALUE));
} else {
original.setSize(between(0, Integer.MAX_VALUE));
}
}
// it's not important how many slices there are, we just need a number for forSlice
@ -64,8 +68,10 @@ public abstract class AbstractBulkByScrollRequestTestCase<R extends AbstractBulk
assertEquals("slice requests always have a single worker", 1, forSliced.getSlices());
assertEquals("requests_per_second is split between all workers", original.getRequestsPerSecond() / actualSlices,
forSliced.getRequestsPerSecond(), Float.MIN_NORMAL);
assertEquals("size is split evenly between all workers", original.getSize() == AbstractBulkByScrollRequest.SIZE_ALL_MATCHES
? AbstractBulkByScrollRequest.SIZE_ALL_MATCHES : original.getSize() / actualSlices, forSliced.getSize());
assertEquals("max_docs is split evenly between all workers",
original.getMaxDocs() == AbstractBulkByScrollRequest.MAX_DOCS_ALL_MATCHES
? AbstractBulkByScrollRequest.MAX_DOCS_ALL_MATCHES : original.getMaxDocs() / actualSlices,
forSliced.getMaxDocs());
assertEquals(slicingTask, forSliced.getParentTask());
extraForSliceAssertions(original, forSliced);

View File

@ -197,7 +197,7 @@ public class TransportDeleteForecastAction extends HandledTransportAction<Delete
private DeleteByQueryRequest buildDeleteByQuery(String jobId, List<String> forecastsToDelete) {
DeleteByQueryRequest request = new DeleteByQueryRequest()
.setAbortOnVersionConflict(false) //since these documents are not updated, a conflict just means it was deleted previously
.setSize(MAX_FORECAST_TO_SEARCH)
.setMaxDocs(MAX_FORECAST_TO_SEARCH)
.setSlices(5);
request.indices(AnomalyDetectorsIndex.jobResultsAliasedName(jobId));