From 6fa258b8fa2efa21fe011e30b3a1653643f61a88 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Wed, 19 Aug 2015 16:43:50 +0200 Subject: [PATCH] Deprecate the `scan` search type. This commit deprecates the `scan` search type in favour of regular scroll requests sorted by `_doc`. Related to #12983 --- .../action/search/SearchType.java | 5 +- .../type/TransportSearchScanAction.java | 1 + .../elasticsearch/search/SearchService.java | 1 + .../action/SearchServiceTransportAction.java | 1 + docs/plugins/delete-by-query.asciidoc | 16 +-- docs/reference/migration/index.asciidoc | 1 + .../migration/migrate_2_0/removals.asciidoc | 4 +- docs/reference/migration/migrate_2_1.asciidoc | 27 ++++ docs/reference/search/request-body.asciidoc | 2 +- docs/reference/search/request/scroll.asciidoc | 53 ++------ .../search/request/search-type.asciidoc | 8 +- docs/reference/search/uri-request.asciidoc | 4 +- .../TransportDeleteByQueryAction.java | 119 +++++++++--------- 13 files changed, 121 insertions(+), 121 deletions(-) create mode 100644 docs/reference/migration/migrate_2_1.asciidoc diff --git a/core/src/main/java/org/elasticsearch/action/search/SearchType.java b/core/src/main/java/org/elasticsearch/action/search/SearchType.java index 432e816a9df..6d91e40f243 100644 --- a/core/src/main/java/org/elasticsearch/action/search/SearchType.java +++ b/core/src/main/java/org/elasticsearch/action/search/SearchType.java @@ -54,7 +54,9 @@ public enum SearchType { /** * Performs scanning of the results which executes the search without any sorting. * It will automatically start scrolling the result set. + * @deprecated will be removed in 3.0, you should do a regular scroll instead, ordered by `_doc` */ + @Deprecated SCAN((byte) 4), /** * Only counts the results, will still execute aggregations and the like. @@ -69,6 +71,7 @@ public enum SearchType { public static final SearchType DEFAULT = QUERY_THEN_FETCH; private static final ParseField COUNT_VALUE = new ParseField("count").withAllDeprecated("query_then_fetch"); + private static final ParseField SCAN_VALUE = new ParseField("scan").withAllDeprecated("query_then_fetch sorting on `_doc`"); private byte id; @@ -121,7 +124,7 @@ public enum SearchType { return SearchType.QUERY_THEN_FETCH; } else if ("query_and_fetch".equals(searchType)) { return SearchType.QUERY_AND_FETCH; - } else if ("scan".equals(searchType)) { + } else if (parseFieldMatcher.match(searchType, SCAN_VALUE)) { return SearchType.SCAN; } else if (parseFieldMatcher.match(searchType, COUNT_VALUE)) { return SearchType.COUNT; diff --git a/core/src/main/java/org/elasticsearch/action/search/type/TransportSearchScanAction.java b/core/src/main/java/org/elasticsearch/action/search/type/TransportSearchScanAction.java index 6edaf8fad42..c5ea86763f4 100644 --- a/core/src/main/java/org/elasticsearch/action/search/type/TransportSearchScanAction.java +++ b/core/src/main/java/org/elasticsearch/action/search/type/TransportSearchScanAction.java @@ -40,6 +40,7 @@ import org.elasticsearch.threadpool.ThreadPool; import static org.elasticsearch.action.search.type.TransportSearchHelper.buildScrollId; +@Deprecated // remove in 3.0 public class TransportSearchScanAction extends TransportSearchTypeAction { @Inject diff --git a/core/src/main/java/org/elasticsearch/search/SearchService.java b/core/src/main/java/org/elasticsearch/search/SearchService.java index fe85bba2919..15eb3c0e8dc 100644 --- a/core/src/main/java/org/elasticsearch/search/SearchService.java +++ b/core/src/main/java/org/elasticsearch/search/SearchService.java @@ -263,6 +263,7 @@ public class SearchService extends AbstractLifecycleComponent { } } + @Deprecated // remove in 3.0 public QuerySearchResult executeScan(ShardSearchRequest request) { final SearchContext context = createAndPutContext(request); final int originalSize = context.size(); diff --git a/core/src/main/java/org/elasticsearch/search/action/SearchServiceTransportAction.java b/core/src/main/java/org/elasticsearch/search/action/SearchServiceTransportAction.java index 5730a023554..4205fd95299 100644 --- a/core/src/main/java/org/elasticsearch/search/action/SearchServiceTransportAction.java +++ b/core/src/main/java/org/elasticsearch/search/action/SearchServiceTransportAction.java @@ -418,6 +418,7 @@ public class SearchServiceTransportAction extends AbstractComponent { } } + @Deprecated // remove in 3.0 class SearchScanTransportHandler implements TransportRequestHandler { @Override public void messageReceived(ShardSearchTransportRequest request, TransportChannel channel) throws Exception { diff --git a/docs/plugins/delete-by-query.asciidoc b/docs/plugins/delete-by-query.asciidoc index a422cc3f29c..a207ae560e5 100644 --- a/docs/plugins/delete-by-query.asciidoc +++ b/docs/plugins/delete-by-query.asciidoc @@ -6,7 +6,7 @@ The delete-by-query plugin adds support for deleting all of the documents replacement for the problematic _delete-by-query_ functionality which has been removed from Elasticsearch core. -Internally, it uses the {ref}/search-request-scroll.html#scroll-scan[Scan/Scroll] +Internally, it uses {ref}/search-request-scroll.html[Scroll] and {ref}/docs-bulk.html[Bulk] APIs to delete documents in an efficient and safe manner. It is slower than the old _delete-by-query_ functionality, but fixes the problems with the previous implementation. @@ -101,7 +101,7 @@ See {ref}/search-uri-request.html[URI search request] for details. `size`:: -The number of hits returned *per shard* by the {ref}/search-request-scroll.html#scroll-scan[scan] +The number of hits returned by the {ref}/search-request-scroll.html[scroll] request. Defaults to 10. May also be specified in the request body. `timeout`:: @@ -148,7 +148,7 @@ The JSON response looks like this: -------------------------------------------------- Internally, the query is used to execute an initial -{ref}/search-request-scroll.html#scroll-scan[scroll/scan] request. As hits are +{ref}/search-request-scroll.html[scroll] request. As hits are pulled from the scroll API, they are passed to the {ref}/docs-bulk.html[Bulk API] for deletion. @@ -157,7 +157,7 @@ was visible to search at the time the request was executed. Any documents that have been reindexed or updated during execution will not be deleted. Since documents can be updated or deleted by external operations during the -_scan-scroll-bulk_ process, the plugin keeps track of different counters for +_scroll-bulk_ process, the plugin keeps track of different counters for each index, with the totals displayed under the `_all` index. The counters are as follows: @@ -212,7 +212,7 @@ Resiliency:: === New delete-by-query implementation The new implementation, provided by this plugin, is built internally -using {ref}/search-request-scroll.html#scroll-scan[scan and scroll] to return +using {ref}/search-request-scroll.html[scroll] to return the document IDs and versions of all the documents that need to be deleted. It then uses the {ref}/docs-bulk.html[`bulk` API] to do the actual deletion. @@ -231,8 +231,8 @@ try-once:: syntactic sugar:: - A delete-by-query is equivalent to a scan/scroll search and corresponding - bulk-deletes by ID. + A delete-by-query is equivalent to a scroll search ordered by `_doc` and + corresponding bulk-deletes by ID. point-in-time:: @@ -267,4 +267,4 @@ move the functionality to a plugin instead of replacing the feautre in core: * There is currently no way to monitor or cancel a running delete-by-query request, except for the `timeout` parameter. -We have plans to solve both of these issues in a later version of Elasticsearch. \ No newline at end of file +We have plans to solve both of these issues in a later version of Elasticsearch. diff --git a/docs/reference/migration/index.asciidoc b/docs/reference/migration/index.asciidoc index ebfad2ada58..19a3e1f622b 100644 --- a/docs/reference/migration/index.asciidoc +++ b/docs/reference/migration/index.asciidoc @@ -16,6 +16,7 @@ As a general rule: See <> for more info. -- +include::migrate_2_1.asciidoc[] include::migrate_2_0.asciidoc[] diff --git a/docs/reference/migration/migrate_2_0/removals.asciidoc b/docs/reference/migration/migrate_2_0/removals.asciidoc index afdc109244c..f02bf3a3f7f 100644 --- a/docs/reference/migration/migrate_2_0/removals.asciidoc +++ b/docs/reference/migration/migrate_2_0/removals.asciidoc @@ -27,8 +27,8 @@ The old delete-by-query functionality was fast but unsafe. It could lead to document differences between the primary and replica shards, and could even produce out of memory exceptions and cause the cluster to crash. -This feature has been reimplemented using the <> and -the <> API, which may be slower for queries which match +This feature has been reimplemented using the <> and +<> APIs, which may be slower for queries which match large numbers of documents, but is safe. Currently, a long running delete-by-query job cannot be cancelled, which is diff --git a/docs/reference/migration/migrate_2_1.asciidoc b/docs/reference/migration/migrate_2_1.asciidoc new file mode 100644 index 00000000000..7542fb3d1df --- /dev/null +++ b/docs/reference/migration/migrate_2_1.asciidoc @@ -0,0 +1,27 @@ +[[breaking-changes-2.1]] +== Breaking changes in 2.1 + +This section discusses the changes that you need to be aware of when migrating +your application to Elasticsearch 2.1. + +=== Search changes + +==== `search_type=scan` deprecated + +The `scan` search type has been deprecated. All benefits from this search +type can now be achieved by doing a scroll request that sorts documents in +`_doc` order, for instance: + +[source,sh] +--------------- +GET /my_index/_search?scroll=2m +{ + "sort": [ + "_doc" + ] +} +--------------- + +Scroll requests sorted by `_doc` have been optimized to more efficiently resume +from where the previous request stopped, so this will have the same performance +characteristics as the former `scan` search type. diff --git a/docs/reference/search/request-body.asciidoc b/docs/reference/search/request-body.asciidoc index 6c7d127bc0c..1469073b2c7 100644 --- a/docs/reference/search/request-body.asciidoc +++ b/docs/reference/search/request-body.asciidoc @@ -64,7 +64,7 @@ And here is a sample response: `search_type`:: The type of the search operation to perform. Can be - `dfs_query_then_fetch`, `query_then_fetch`, or 'scan'. + `dfs_query_then_fetch` or `query_then_fetch`. Defaults to `query_then_fetch`. See <> for more. diff --git a/docs/reference/search/request/scroll.asciidoc b/docs/reference/search/request/scroll.asciidoc index 6abdd27220f..2ad1f57388e 100644 --- a/docs/reference/search/request/scroll.asciidoc +++ b/docs/reference/search/request/scroll.asciidoc @@ -90,59 +90,20 @@ used. NOTE: If the request specifies aggregations, only the initial search response will contain the aggregations results. -[[scroll-scan]] -==== Efficient scrolling with Scroll-Scan - -Deep pagination with <> -- e.g. -`?size=10&from=10000` -- is very inefficient as (in this example) 100,000 -sorted results have to be retrieved from each shard and resorted in order to -return just 10 results. This process has to be repeated for every page -requested. - -The `scroll` API keeps track of which results have already been returned and -so is able to return sorted results more efficiently than with deep -pagination. However, sorting results (which happens by default) still has a -cost. - -Normally, you just want to retrieve all results and the order doesn't matter. -Scrolling can be combined with the <> search type to disable -any scoring or sorting and to return results in the most efficient way -possible. All that is needed is to add `search_type=scan` to the query string -of the initial search request: +NOTE: Scroll requests have optimizations that make them faster when the sort +order is `_doc`. If you want to iterate over all documents regardless of the +order, this is the most efficient option: [source,js] -------------------------------------------------- -curl 'localhost:9200/twitter/tweet/_search?scroll=1m&search_type=scan' <1> -d ' +curl -XGET 'localhost:9200/_search?scroll=1m' -d ' { - "query": { - "match" : { - "title" : "elasticsearch" - } - } + "sort": [ + "_doc" + } } ' -------------------------------------------------- -<1> Setting `search_type` to `scan` disables sorting and makes scrolling - very efficient. - -A scanning scroll request differs from a standard scroll request in four -ways: - -* No score is calculated and sorting is disabled. Results are returned in - the order they appear in the index. - -* Aggregations are not supported. - -* The response of the initial `search` request will not contain any results in - the `hits` array. The first results will be returned by the first `scroll` - request. - -* The <> controls the number of - results *per shard*, not per request, so a `size` of `10` which hits 5 - shards will return a maximum of 50 results per `scroll` request. - -If you want the scoring to happen, even without sorting on it, set the -`track_scores` parameter to `true`. [[scroll-search-context]] ==== Keeping the search context alive diff --git a/docs/reference/search/request/search-type.asciidoc b/docs/reference/search/request/search-type.asciidoc index 6ad75dc5245..ab2d9164988 100644 --- a/docs/reference/search/request/search-type.asciidoc +++ b/docs/reference/search/request/search-type.asciidoc @@ -26,8 +26,8 @@ each shard using these global frequencies. Also, because of the need to sort the results, getting back a large document set, or even scrolling it, while maintaining the correct sorting behavior can be a very expensive operation. For large result set -scrolling without sorting, the `scan` search type (explained below) is -also available. +scrolling, it is best to sort by `_doc` if the order in which documents +are returned is not important. Elasticsearch is very flexible and allows to control the type of search to execute on a *per search request* basis. The type can be configured @@ -77,9 +77,11 @@ API as it provides more options. [[scan]] ==== Scan +deprecated[2.1.0, `scan` does not provide any benefits over a regular `scroll` request sorted by `_doc`] + Parameter value: *scan*. The `scan` search type disables sorting in order to allow very efficient -scrolling through large result sets. See <> for more. +scrolling through large result sets. diff --git a/docs/reference/search/uri-request.asciidoc b/docs/reference/search/uri-request.asciidoc index 646dfa54c67..e01f9e9d2c7 100644 --- a/docs/reference/search/uri-request.asciidoc +++ b/docs/reference/search/uri-request.asciidoc @@ -103,7 +103,9 @@ Defaults to no terminate_after. |`size` |The number of hits to return. Defaults to `10`. |`search_type` |The type of the search operation to perform. Can be -`dfs_query_then_fetch`, `query_then_fetch`, `scan` or `count` +`dfs_query_then_fetch`, `query_then_fetch`, `scan` +deprecated[2.1.0,Replaced by a regular `scroll` sorted by `_doc`] +or `count` deprecated[2.0.0-beta1,Replaced by `size: 0`]. Defaults to `query_then_fetch`. See <> for more details on the different types of search that can be performed. diff --git a/plugins/delete-by-query/src/main/java/org/elasticsearch/action/deletebyquery/TransportDeleteByQueryAction.java b/plugins/delete-by-query/src/main/java/org/elasticsearch/action/deletebyquery/TransportDeleteByQueryAction.java index 602b0a4cabe..de8d3c84f1a 100644 --- a/plugins/delete-by-query/src/main/java/org/elasticsearch/action/deletebyquery/TransportDeleteByQueryAction.java +++ b/plugins/delete-by-query/src/main/java/org/elasticsearch/action/deletebyquery/TransportDeleteByQueryAction.java @@ -102,12 +102,17 @@ public class TransportDeleteByQueryAction extends HandledTransportAction 0) { source.size(request.size()); } @@ -121,17 +126,9 @@ public class TransportDeleteByQueryAction extends HandledTransportAction() { @Override public void onResponse(SearchResponse scrollResponse) { - final SearchHit[] docs = scrollResponse.getHits().getHits(); - final String nextScrollId = scrollResponse.getScrollId(); - addShardFailures(scrollResponse.getShardFailures()); - - if (logger.isTraceEnabled()) { - logger.trace("scroll request [{}] executed: [{}] document(s) returned", scrollId, docs.length); - } - - if ((docs.length == 0) || (nextScrollId == null)) { - logger.trace("scrolling documents terminated"); - finishHim(scrollId, false, null); - return; - } - - if (hasTimedOut()) { - logger.trace("scrolling documents timed out"); - finishHim(scrollId, true, null); - return; - } - - // Delete the scrolled documents using the Bulk API - BulkRequest bulkRequest = new BulkRequest(); - for (SearchHit doc : docs) { - DeleteRequest delete = new DeleteRequest(doc.index(), doc.type(), doc.id()).version(doc.version()); - SearchHitField routing = doc.field("_routing"); - if (routing != null) { - delete.routing((String) routing.value()); - } - SearchHitField parent = doc.field("_parent"); - if (parent != null) { - delete.parent((String) parent.value()); - } - bulkRequest.add(delete); - } - - logger.trace("executing bulk request with [{}] deletions", bulkRequest.numberOfActions()); - client.bulk(bulkRequest, new ActionListener() { - @Override - public void onResponse(BulkResponse bulkResponse) { - onBulkResponse(nextScrollId, bulkResponse); - } - - @Override - public void onFailure(Throwable e) { - onBulkFailure(nextScrollId, docs, e); - } - }); + deleteHits(scrollId, scrollResponse); } @Override @@ -212,6 +163,56 @@ public class TransportDeleteByQueryAction extends HandledTransportAction() { + @Override + public void onResponse(BulkResponse bulkResponse) { + onBulkResponse(nextScrollId, bulkResponse); + } + + @Override + public void onFailure(Throwable e) { + onBulkFailure(nextScrollId, docs, e); + } + }); + } + void onBulkResponse(String scrollId, BulkResponse bulkResponse) { try { for (BulkItemResponse item : bulkResponse.getItems()) {