Merge pull request #12994 from jpountz/deprecate/scan

Deprecate the `scan` search type.
2015-08-20 12:47:45 +02:00 · 2015-08-20 12:47:45 +02:00 · 41d8fbe8f5
parent 833f821171 6fa258b8fa
commit 41d8fbe8f5
13 changed files with 121 additions and 121 deletions
--- a/core/src/main/java/org/elasticsearch/action/search/SearchType.java
+++ b/core/src/main/java/org/elasticsearch/action/search/SearchType.java
@ -54,7 +54,9 @@ public enum SearchType {
    /**
     * Performs scanning of the results which executes the search without any sorting.
     * It will automatically start scrolling the result set.
     * @deprecated will be removed in 3.0, you should do a regular scroll instead, ordered by `_doc`
     */
    @Deprecated
    SCAN((byte) 4),
    /**
     * Only counts the results, will still execute aggregations and the like.
@ -69,6 +71,7 @@ public enum SearchType {
    public static final SearchType DEFAULT = QUERY_THEN_FETCH;
    private static final ParseField COUNT_VALUE = new ParseField("count").withAllDeprecated("query_then_fetch");
    private static final ParseField SCAN_VALUE = new ParseField("scan").withAllDeprecated("query_then_fetch sorting on `_doc`");
    private byte id;
@ -121,7 +124,7 @@ public enum SearchType {
            return SearchType.QUERY_THEN_FETCH;
        } else if ("query_and_fetch".equals(searchType)) {
            return SearchType.QUERY_AND_FETCH;
-        } else if ("scan".equals(searchType)) {
+        } else if (parseFieldMatcher.match(searchType, SCAN_VALUE)) {
            return SearchType.SCAN;
        } else if (parseFieldMatcher.match(searchType, COUNT_VALUE)) {
            return SearchType.COUNT;
--- a/core/src/main/java/org/elasticsearch/action/search/type/TransportSearchScanAction.java
+++ b/core/src/main/java/org/elasticsearch/action/search/type/TransportSearchScanAction.java
@ -40,6 +40,7 @@ import org.elasticsearch.threadpool.ThreadPool;
 import static org.elasticsearch.action.search.type.TransportSearchHelper.buildScrollId;
@Deprecated // remove in 3.0
 public class TransportSearchScanAction extends TransportSearchTypeAction {
    @Inject
--- a/core/src/main/java/org/elasticsearch/search/SearchService.java
+++ b/core/src/main/java/org/elasticsearch/search/SearchService.java
@ -263,6 +263,7 @@ public class SearchService extends AbstractLifecycleComponent<SearchService> {
        }
    }
    @Deprecated // remove in 3.0
    public QuerySearchResult executeScan(ShardSearchRequest request) {
        final SearchContext context = createAndPutContext(request);
        final int originalSize = context.size();
--- a/core/src/main/java/org/elasticsearch/search/action/SearchServiceTransportAction.java
+++ b/core/src/main/java/org/elasticsearch/search/action/SearchServiceTransportAction.java
@ -418,6 +418,7 @@ public class SearchServiceTransportAction extends AbstractComponent {
        }
    }
    @Deprecated // remove in 3.0
    class SearchScanTransportHandler implements TransportRequestHandler<ShardSearchTransportRequest> {
        @Override
        public void messageReceived(ShardSearchTransportRequest request, TransportChannel channel) throws Exception {
--- a/docs/plugins/delete-by-query.asciidoc
+++ b/docs/plugins/delete-by-query.asciidoc
@ -6,7 +6,7 @@ The delete-by-query plugin adds support for deleting all of the documents
 replacement for the problematic _delete-by-query_ functionality which has been
 removed from Elasticsearch core.
-Internally, it uses the {ref}/search-request-scroll.html#scroll-scan[Scan/Scroll]
+Internally, it uses {ref}/search-request-scroll.html[Scroll]
 and {ref}/docs-bulk.html[Bulk] APIs to delete documents in an efficient and
 safe manner. It is slower than the old _delete-by-query_ functionality, but
 fixes the problems with the previous implementation.
@ -101,7 +101,7 @@ See {ref}/search-uri-request.html[URI search request] for details.
 `size`::
-The number of hits returned *per shard* by the {ref}/search-request-scroll.html#scroll-scan[scan]
+The number of hits returned by the {ref}/search-request-scroll.html[scroll]
 request.  Defaults to 10.  May also be specified in the request body.
 `timeout`::
@ -148,7 +148,7 @@ The JSON response looks like this:
 --------------------------------------------------
 Internally, the query is used to execute an initial
-{ref}/search-request-scroll.html#scroll-scan[scroll/scan] request. As hits are
+{ref}/search-request-scroll.html[scroll] request. As hits are
 pulled from the scroll API, they are passed to the {ref}/docs-bulk.html[Bulk
 API] for deletion.
@ -157,7 +157,7 @@ was visible to search at the time the request was executed.  Any documents
 that have been reindexed or updated during execution will not be deleted.
 Since documents can be updated or deleted by external operations during the
-_scan-scroll-bulk_ process, the plugin keeps track of different counters for
+_scroll-bulk_ process, the plugin keeps track of different counters for
 each index, with the totals displayed under the `_all` index.  The counters
 are as follows:
@ -212,7 +212,7 @@ Resiliency::
 === New delete-by-query implementation
 The new implementation, provided by this plugin, is built internally
-using  {ref}/search-request-scroll.html#scroll-scan[scan and scroll] to return
+using  {ref}/search-request-scroll.html[scroll] to return
 the document IDs and versions of all the documents that need to be deleted.
 It then uses  the {ref}/docs-bulk.html[`bulk` API] to do the actual deletion.
@ -231,8 +231,8 @@ try-once::
 syntactic sugar::
-    A delete-by-query is equivalent to a scan/scroll search and corresponding
+    A delete-by-query is equivalent to a scroll search ordered by `_doc` and
-    bulk-deletes by ID.
+    corresponding bulk-deletes by ID.
 point-in-time::
@ -267,4 +267,4 @@ move the functionality to a plugin instead of replacing the feautre in core:
 * There is currently no way to monitor or cancel a running delete-by-query
  request, except for the `timeout` parameter.
-We have plans to solve both of these issues in a later version of Elasticsearch.
+We have plans to solve both of these issues in a later version of Elasticsearch.
--- a/docs/reference/migration/index.asciidoc
+++ b/docs/reference/migration/index.asciidoc
@ -16,6 +16,7 @@ As a general rule:
 See <<setup-upgrade>> for more info.
 --
 include::migrate_2_1.asciidoc[]
 include::migrate_2_0.asciidoc[]
--- a/docs/reference/migration/migrate_2_0/removals.asciidoc
+++ b/docs/reference/migration/migrate_2_0/removals.asciidoc
@ -27,8 +27,8 @@ The old delete-by-query functionality was fast but unsafe.  It could lead to
 document differences between the primary and replica shards, and could even
 produce out of memory exceptions and cause the cluster to crash.
-This feature has been reimplemented using the <<scroll-scan,scroll/scan>> and
+This feature has been reimplemented using the <<search-request-scroll,scroll>> and
-the <<docs-bulk,`bulk`>> API, which may be slower for queries which match
+<<docs-bulk,`bulk`>> APIs, which may be slower for queries which match
 large numbers of documents, but is safe.
 Currently, a long running delete-by-query job cannot be cancelled, which is
--- a/docs/reference/migration/migrate_2_1.asciidoc
+++ b/docs/reference/migration/migrate_2_1.asciidoc
@ -0,0 +1,27 @@
 [[breaking-changes-2.1]]
 == Breaking changes in 2.1
 This section discusses the changes that you need to be aware of when migrating
 your application to Elasticsearch 2.1.
 === Search changes
 ==== `search_type=scan` deprecated
 The `scan` search type has been deprecated. All benefits from this search
 type can now be achieved by doing a scroll request that sorts documents in
 `_doc` order, for instance:
 [source,sh]
 ---------------
 GET /my_index/_search?scroll=2m
 {
  "sort": [
    "_doc"
  ]
 }
 ---------------
 Scroll requests sorted by `_doc` have been optimized to more efficiently resume
 from where the previous request stopped, so this will have the same performance
 characteristics as the former `scan` search type.
--- a/docs/reference/search/request-body.asciidoc
+++ b/docs/reference/search/request-body.asciidoc
@ -64,7 +64,7 @@ And here is a sample response:
 `search_type`::
    The type of the search operation to perform. Can be
-    `dfs_query_then_fetch`, `query_then_fetch`, or 'scan'.
+    `dfs_query_then_fetch` or `query_then_fetch`.
    Defaults to `query_then_fetch`.
    See <<search-request-search-type,_Search Type_>> for more.
--- a/docs/reference/search/request/scroll.asciidoc
+++ b/docs/reference/search/request/scroll.asciidoc
@ -90,59 +90,20 @@ used.
 NOTE: If the request specifies aggregations, only the initial search response
 will contain the aggregations results.
-[[scroll-scan]]
+NOTE: Scroll requests have optimizations that make them faster when the sort
-==== Efficient scrolling with Scroll-Scan
+order is `_doc`. If you want to iterate over all documents regardless of the
-
+order, this is the most efficient option:
 Deep pagination with <<search-request-from-size,`from` and `size`>> -- e.g.
 `?size=10&from=10000` -- is very inefficient as (in this example) 100,000
 sorted results have to be retrieved from each shard and resorted in order to
 return just 10 results.  This process has to be repeated for every page
 requested.
 The `scroll` API keeps track of which results have already been returned and
 so is able to return sorted results more efficiently than with deep
 pagination.  However, sorting results (which happens by default) still has a
 cost.
 Normally, you just want to retrieve all results and the order doesn't matter.
 Scrolling can be combined with the <<scan,`scan`>> search type to disable
 any scoring or sorting and to return results in the most efficient way
 possible.  All that is needed is to add `search_type=scan` to the query string
 of the initial search request:
 [source,js]
 --------------------------------------------------
-curl 'localhost:9200/twitter/tweet/_search?scroll=1m&search_type=scan' <1> -d '
+curl -XGET 'localhost:9200/_search?scroll=1m' -d '
 {
-    "query": {
+  "sort": [
-        "match" : {
+    "_doc"
-            "title" : "elasticsearch"
+  }
        }
    }
 }
 '
 --------------------------------------------------
 <1> Setting `search_type` to `scan` disables sorting and makes scrolling
    very efficient.
 A scanning scroll request differs from a standard scroll request in four
 ways:
 * No score is calculated and sorting is disabled. Results are returned in
  the order they appear in the index.
 * Aggregations are not supported.
 * The response of the initial `search` request will not contain any results in
  the `hits` array. The first results will be returned by the first `scroll`
  request.
 * The <<search-request-from-size,`size` parameter>> controls the number of
  results *per shard*, not per request, so a `size` of `10` which hits 5
  shards will return a maximum of 50 results per `scroll` request.
 If you want the scoring to happen, even without sorting on it, set the
 `track_scores` parameter to `true`.
 [[scroll-search-context]]
 ==== Keeping the search context alive
--- a/docs/reference/search/request/search-type.asciidoc
+++ b/docs/reference/search/request/search-type.asciidoc
@ -26,8 +26,8 @@ each shard using these global frequencies.
 Also, because of the need to sort the results, getting back a large
 document set, or even scrolling it, while maintaining the correct sorting
 behavior can be a very expensive operation. For large result set
-scrolling without sorting, the `scan` search type (explained below) is
+scrolling, it is best to sort by `_doc` if the order in which documents
-also available.
+are returned is not important.
 Elasticsearch is very flexible and allows to control the type of search
 to execute on a *per search request* basis. The type can be configured
@ -77,9 +77,11 @@ API as it provides more options.
 [[scan]]
 ==== Scan
 deprecated[2.1.0, `scan` does not provide any benefits over a regular `scroll` request sorted by `_doc`]
 Parameter value: *scan*.
 The `scan` search type disables sorting in order to allow very efficient
-scrolling through large result sets.  See <<scroll-scan>> for more.
+scrolling through large result sets.
--- a/docs/reference/search/uri-request.asciidoc
+++ b/docs/reference/search/uri-request.asciidoc
@ -103,7 +103,9 @@ Defaults to no terminate_after.
 |`size` |The number of hits to return. Defaults to `10`.
 |`search_type` |The type of the search operation to perform. Can be
-`dfs_query_then_fetch`, `query_then_fetch`, `scan` or `count`
+`dfs_query_then_fetch`, `query_then_fetch`, `scan`
 deprecated[2.1.0,Replaced by a regular `scroll` sorted by `_doc`]
 or `count`
 deprecated[2.0.0-beta1,Replaced by `size: 0`]. Defaults to `query_then_fetch`. See
 <<search-request-search-type,_Search Type_>> for
 more details on the different types of search that can be performed.
--- a/plugins/delete-by-query/src/main/java/org/elasticsearch/action/deletebyquery/TransportDeleteByQueryAction.java
+++ b/plugins/delete-by-query/src/main/java/org/elasticsearch/action/deletebyquery/TransportDeleteByQueryAction.java
@ -102,12 +102,17 @@ public class TransportDeleteByQueryAction extends HandledTransportAction<DeleteB
        void executeScan() {
            try {
                final SearchRequest scanRequest = new SearchRequest(request.indices()).types(request.types()).indicesOptions(request.indicesOptions());
-                scanRequest.searchType(SearchType.SCAN).scroll(request.scroll());
+                scanRequest.scroll(request.scroll());
                if (request.routing() != null) {
                    scanRequest.routing(request.routing());
                }
-                SearchSourceBuilder source = new SearchSourceBuilder().query(request.source()).fields("_routing", "_parent").fetchSource(false).version(true);
+                SearchSourceBuilder source = new SearchSourceBuilder()
                        .query(request.source())
                        .fields("_routing", "_parent")
                        .sort("_doc") // important for performance
                        .fetchSource(false)
                        .version(true);
                if (request.size() > 0) {
                    source.size(request.size());
                }
@ -121,17 +126,9 @@ public class TransportDeleteByQueryAction extends HandledTransportAction<DeleteB
                    @Override
                    public void onResponse(SearchResponse searchResponse) {
                        long hits = searchResponse.getHits().getTotalHits();
-                        logger.trace("scan request executed: found [{}] document(s) to delete", hits);
+                        logger.trace("first request executed: found [{}] document(s) to delete", hits);
                        addShardFailures(searchResponse.getShardFailures());
                        if (hits == 0) {
                            finishHim(searchResponse.getScrollId(), false, null);
                            return;
                        }
                        total.set(hits);
-
+                        deleteHits(null, searchResponse);
                        logger.trace("start scrolling [{}] document(s)", hits);
                        executeScroll(searchResponse.getScrollId());
                    }
                    @Override
@ -151,53 +148,7 @@ public class TransportDeleteByQueryAction extends HandledTransportAction<DeleteB
                scrollAction.execute(new SearchScrollRequest(scrollId).scroll(request.scroll()), new ActionListener<SearchResponse>() {
                    @Override
                    public void onResponse(SearchResponse scrollResponse) {
-                        final SearchHit[] docs = scrollResponse.getHits().getHits();
+                        deleteHits(scrollId, scrollResponse);
                        final String nextScrollId = scrollResponse.getScrollId();
                        addShardFailures(scrollResponse.getShardFailures());
                        if (logger.isTraceEnabled()) {
                            logger.trace("scroll request [{}] executed: [{}] document(s) returned", scrollId, docs.length);
                        }
                        if ((docs.length == 0) || (nextScrollId == null)) {
                            logger.trace("scrolling documents terminated");
                            finishHim(scrollId, false, null);
                            return;
                        }
                        if (hasTimedOut()) {
                            logger.trace("scrolling documents timed out");
                            finishHim(scrollId, true, null);
                            return;
                        }
                        // Delete the scrolled documents using the Bulk API
                        BulkRequest bulkRequest = new BulkRequest();
                        for (SearchHit doc : docs) {
                            DeleteRequest delete = new DeleteRequest(doc.index(), doc.type(), doc.id()).version(doc.version());
                            SearchHitField routing = doc.field("_routing");
                            if (routing != null) {
                                delete.routing((String) routing.value());
                            }
                            SearchHitField parent = doc.field("_parent");
                            if (parent != null) {
                                delete.parent((String) parent.value());
                            }
                            bulkRequest.add(delete);
                        }
                        logger.trace("executing bulk request with [{}] deletions", bulkRequest.numberOfActions());
                        client.bulk(bulkRequest, new ActionListener<BulkResponse>() {
                            @Override
                            public void onResponse(BulkResponse bulkResponse) {
                                onBulkResponse(nextScrollId, bulkResponse);
                            }
                            @Override
                            public void onFailure(Throwable e) {
                                onBulkFailure(nextScrollId, docs, e);
                            }
                        });
                    }
                    @Override
@ -212,6 +163,56 @@ public class TransportDeleteByQueryAction extends HandledTransportAction<DeleteB
            }
        }
        void deleteHits(String scrollId, SearchResponse scrollResponse) {
            final SearchHit[] docs = scrollResponse.getHits().getHits();
            final String nextScrollId = scrollResponse.getScrollId();
            addShardFailures(scrollResponse.getShardFailures());
            if (logger.isTraceEnabled()) {
                logger.trace("scroll request [{}] executed: [{}] document(s) returned", scrollId, docs.length);
            }
            if ((docs.length == 0) || (nextScrollId == null)) {
                logger.trace("scrolling documents terminated");
                finishHim(scrollId, false, null);
                return;
            }
            if (hasTimedOut()) {
                logger.trace("scrolling documents timed out");
                finishHim(scrollId, true, null);
                return;
            }
            // Delete the scrolled documents using the Bulk API
            BulkRequest bulkRequest = new BulkRequest();
            for (SearchHit doc : docs) {
                DeleteRequest delete = new DeleteRequest(doc.index(), doc.type(), doc.id()).version(doc.version());
                SearchHitField routing = doc.field("_routing");
                if (routing != null) {
                    delete.routing((String) routing.value());
                }
                SearchHitField parent = doc.field("_parent");
                if (parent != null) {
                    delete.parent((String) parent.value());
                }
                bulkRequest.add(delete);
            }
            logger.trace("executing bulk request with [{}] deletions", bulkRequest.numberOfActions());
            client.bulk(bulkRequest, new ActionListener<BulkResponse>() {
                @Override
                public void onResponse(BulkResponse bulkResponse) {
                    onBulkResponse(nextScrollId, bulkResponse);
                }
                @Override
                public void onFailure(Throwable e) {
                    onBulkFailure(nextScrollId, docs, e);
                }
            });
        }
        void onBulkResponse(String scrollId, BulkResponse bulkResponse) {
            try {
                for (BulkItemResponse item : bulkResponse.getItems()) {