Deprecate the `scan` search type.
This commit deprecates the `scan` search type in favour of regular scroll requests sorted by `_doc`. Related to #12983
This commit is contained in:
parent
833f821171
commit
6fa258b8fa
|
@ -54,7 +54,9 @@ public enum SearchType {
|
||||||
/**
|
/**
|
||||||
* Performs scanning of the results which executes the search without any sorting.
|
* Performs scanning of the results which executes the search without any sorting.
|
||||||
* It will automatically start scrolling the result set.
|
* It will automatically start scrolling the result set.
|
||||||
|
* @deprecated will be removed in 3.0, you should do a regular scroll instead, ordered by `_doc`
|
||||||
*/
|
*/
|
||||||
|
@Deprecated
|
||||||
SCAN((byte) 4),
|
SCAN((byte) 4),
|
||||||
/**
|
/**
|
||||||
* Only counts the results, will still execute aggregations and the like.
|
* Only counts the results, will still execute aggregations and the like.
|
||||||
|
@ -69,6 +71,7 @@ public enum SearchType {
|
||||||
public static final SearchType DEFAULT = QUERY_THEN_FETCH;
|
public static final SearchType DEFAULT = QUERY_THEN_FETCH;
|
||||||
|
|
||||||
private static final ParseField COUNT_VALUE = new ParseField("count").withAllDeprecated("query_then_fetch");
|
private static final ParseField COUNT_VALUE = new ParseField("count").withAllDeprecated("query_then_fetch");
|
||||||
|
private static final ParseField SCAN_VALUE = new ParseField("scan").withAllDeprecated("query_then_fetch sorting on `_doc`");
|
||||||
|
|
||||||
private byte id;
|
private byte id;
|
||||||
|
|
||||||
|
@ -121,7 +124,7 @@ public enum SearchType {
|
||||||
return SearchType.QUERY_THEN_FETCH;
|
return SearchType.QUERY_THEN_FETCH;
|
||||||
} else if ("query_and_fetch".equals(searchType)) {
|
} else if ("query_and_fetch".equals(searchType)) {
|
||||||
return SearchType.QUERY_AND_FETCH;
|
return SearchType.QUERY_AND_FETCH;
|
||||||
} else if ("scan".equals(searchType)) {
|
} else if (parseFieldMatcher.match(searchType, SCAN_VALUE)) {
|
||||||
return SearchType.SCAN;
|
return SearchType.SCAN;
|
||||||
} else if (parseFieldMatcher.match(searchType, COUNT_VALUE)) {
|
} else if (parseFieldMatcher.match(searchType, COUNT_VALUE)) {
|
||||||
return SearchType.COUNT;
|
return SearchType.COUNT;
|
||||||
|
|
|
@ -40,6 +40,7 @@ import org.elasticsearch.threadpool.ThreadPool;
|
||||||
|
|
||||||
import static org.elasticsearch.action.search.type.TransportSearchHelper.buildScrollId;
|
import static org.elasticsearch.action.search.type.TransportSearchHelper.buildScrollId;
|
||||||
|
|
||||||
|
@Deprecated // remove in 3.0
|
||||||
public class TransportSearchScanAction extends TransportSearchTypeAction {
|
public class TransportSearchScanAction extends TransportSearchTypeAction {
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
|
|
|
@ -263,6 +263,7 @@ public class SearchService extends AbstractLifecycleComponent<SearchService> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Deprecated // remove in 3.0
|
||||||
public QuerySearchResult executeScan(ShardSearchRequest request) {
|
public QuerySearchResult executeScan(ShardSearchRequest request) {
|
||||||
final SearchContext context = createAndPutContext(request);
|
final SearchContext context = createAndPutContext(request);
|
||||||
final int originalSize = context.size();
|
final int originalSize = context.size();
|
||||||
|
|
|
@ -418,6 +418,7 @@ public class SearchServiceTransportAction extends AbstractComponent {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Deprecated // remove in 3.0
|
||||||
class SearchScanTransportHandler implements TransportRequestHandler<ShardSearchTransportRequest> {
|
class SearchScanTransportHandler implements TransportRequestHandler<ShardSearchTransportRequest> {
|
||||||
@Override
|
@Override
|
||||||
public void messageReceived(ShardSearchTransportRequest request, TransportChannel channel) throws Exception {
|
public void messageReceived(ShardSearchTransportRequest request, TransportChannel channel) throws Exception {
|
||||||
|
|
|
@ -6,7 +6,7 @@ The delete-by-query plugin adds support for deleting all of the documents
|
||||||
replacement for the problematic _delete-by-query_ functionality which has been
|
replacement for the problematic _delete-by-query_ functionality which has been
|
||||||
removed from Elasticsearch core.
|
removed from Elasticsearch core.
|
||||||
|
|
||||||
Internally, it uses the {ref}/search-request-scroll.html#scroll-scan[Scan/Scroll]
|
Internally, it uses {ref}/search-request-scroll.html[Scroll]
|
||||||
and {ref}/docs-bulk.html[Bulk] APIs to delete documents in an efficient and
|
and {ref}/docs-bulk.html[Bulk] APIs to delete documents in an efficient and
|
||||||
safe manner. It is slower than the old _delete-by-query_ functionality, but
|
safe manner. It is slower than the old _delete-by-query_ functionality, but
|
||||||
fixes the problems with the previous implementation.
|
fixes the problems with the previous implementation.
|
||||||
|
@ -101,7 +101,7 @@ See {ref}/search-uri-request.html[URI search request] for details.
|
||||||
|
|
||||||
`size`::
|
`size`::
|
||||||
|
|
||||||
The number of hits returned *per shard* by the {ref}/search-request-scroll.html#scroll-scan[scan]
|
The number of hits returned by the {ref}/search-request-scroll.html[scroll]
|
||||||
request. Defaults to 10. May also be specified in the request body.
|
request. Defaults to 10. May also be specified in the request body.
|
||||||
|
|
||||||
`timeout`::
|
`timeout`::
|
||||||
|
@ -148,7 +148,7 @@ The JSON response looks like this:
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
Internally, the query is used to execute an initial
|
Internally, the query is used to execute an initial
|
||||||
{ref}/search-request-scroll.html#scroll-scan[scroll/scan] request. As hits are
|
{ref}/search-request-scroll.html[scroll] request. As hits are
|
||||||
pulled from the scroll API, they are passed to the {ref}/docs-bulk.html[Bulk
|
pulled from the scroll API, they are passed to the {ref}/docs-bulk.html[Bulk
|
||||||
API] for deletion.
|
API] for deletion.
|
||||||
|
|
||||||
|
@ -157,7 +157,7 @@ was visible to search at the time the request was executed. Any documents
|
||||||
that have been reindexed or updated during execution will not be deleted.
|
that have been reindexed or updated during execution will not be deleted.
|
||||||
|
|
||||||
Since documents can be updated or deleted by external operations during the
|
Since documents can be updated or deleted by external operations during the
|
||||||
_scan-scroll-bulk_ process, the plugin keeps track of different counters for
|
_scroll-bulk_ process, the plugin keeps track of different counters for
|
||||||
each index, with the totals displayed under the `_all` index. The counters
|
each index, with the totals displayed under the `_all` index. The counters
|
||||||
are as follows:
|
are as follows:
|
||||||
|
|
||||||
|
@ -212,7 +212,7 @@ Resiliency::
|
||||||
=== New delete-by-query implementation
|
=== New delete-by-query implementation
|
||||||
|
|
||||||
The new implementation, provided by this plugin, is built internally
|
The new implementation, provided by this plugin, is built internally
|
||||||
using {ref}/search-request-scroll.html#scroll-scan[scan and scroll] to return
|
using {ref}/search-request-scroll.html[scroll] to return
|
||||||
the document IDs and versions of all the documents that need to be deleted.
|
the document IDs and versions of all the documents that need to be deleted.
|
||||||
It then uses the {ref}/docs-bulk.html[`bulk` API] to do the actual deletion.
|
It then uses the {ref}/docs-bulk.html[`bulk` API] to do the actual deletion.
|
||||||
|
|
||||||
|
@ -231,8 +231,8 @@ try-once::
|
||||||
|
|
||||||
syntactic sugar::
|
syntactic sugar::
|
||||||
|
|
||||||
A delete-by-query is equivalent to a scan/scroll search and corresponding
|
A delete-by-query is equivalent to a scroll search ordered by `_doc` and
|
||||||
bulk-deletes by ID.
|
corresponding bulk-deletes by ID.
|
||||||
|
|
||||||
point-in-time::
|
point-in-time::
|
||||||
|
|
||||||
|
|
|
@ -16,6 +16,7 @@ As a general rule:
|
||||||
|
|
||||||
See <<setup-upgrade>> for more info.
|
See <<setup-upgrade>> for more info.
|
||||||
--
|
--
|
||||||
|
include::migrate_2_1.asciidoc[]
|
||||||
|
|
||||||
include::migrate_2_0.asciidoc[]
|
include::migrate_2_0.asciidoc[]
|
||||||
|
|
||||||
|
|
|
@ -27,8 +27,8 @@ The old delete-by-query functionality was fast but unsafe. It could lead to
|
||||||
document differences between the primary and replica shards, and could even
|
document differences between the primary and replica shards, and could even
|
||||||
produce out of memory exceptions and cause the cluster to crash.
|
produce out of memory exceptions and cause the cluster to crash.
|
||||||
|
|
||||||
This feature has been reimplemented using the <<scroll-scan,scroll/scan>> and
|
This feature has been reimplemented using the <<search-request-scroll,scroll>> and
|
||||||
the <<docs-bulk,`bulk`>> API, which may be slower for queries which match
|
<<docs-bulk,`bulk`>> APIs, which may be slower for queries which match
|
||||||
large numbers of documents, but is safe.
|
large numbers of documents, but is safe.
|
||||||
|
|
||||||
Currently, a long running delete-by-query job cannot be cancelled, which is
|
Currently, a long running delete-by-query job cannot be cancelled, which is
|
||||||
|
|
|
@ -0,0 +1,27 @@
|
||||||
|
[[breaking-changes-2.1]]
|
||||||
|
== Breaking changes in 2.1
|
||||||
|
|
||||||
|
This section discusses the changes that you need to be aware of when migrating
|
||||||
|
your application to Elasticsearch 2.1.
|
||||||
|
|
||||||
|
=== Search changes
|
||||||
|
|
||||||
|
==== `search_type=scan` deprecated
|
||||||
|
|
||||||
|
The `scan` search type has been deprecated. All benefits from this search
|
||||||
|
type can now be achieved by doing a scroll request that sorts documents in
|
||||||
|
`_doc` order, for instance:
|
||||||
|
|
||||||
|
[source,sh]
|
||||||
|
---------------
|
||||||
|
GET /my_index/_search?scroll=2m
|
||||||
|
{
|
||||||
|
"sort": [
|
||||||
|
"_doc"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
---------------
|
||||||
|
|
||||||
|
Scroll requests sorted by `_doc` have been optimized to more efficiently resume
|
||||||
|
from where the previous request stopped, so this will have the same performance
|
||||||
|
characteristics as the former `scan` search type.
|
|
@ -64,7 +64,7 @@ And here is a sample response:
|
||||||
`search_type`::
|
`search_type`::
|
||||||
|
|
||||||
The type of the search operation to perform. Can be
|
The type of the search operation to perform. Can be
|
||||||
`dfs_query_then_fetch`, `query_then_fetch`, or 'scan'.
|
`dfs_query_then_fetch` or `query_then_fetch`.
|
||||||
Defaults to `query_then_fetch`.
|
Defaults to `query_then_fetch`.
|
||||||
See <<search-request-search-type,_Search Type_>> for more.
|
See <<search-request-search-type,_Search Type_>> for more.
|
||||||
|
|
||||||
|
|
|
@ -90,59 +90,20 @@ used.
|
||||||
NOTE: If the request specifies aggregations, only the initial search response
|
NOTE: If the request specifies aggregations, only the initial search response
|
||||||
will contain the aggregations results.
|
will contain the aggregations results.
|
||||||
|
|
||||||
[[scroll-scan]]
|
NOTE: Scroll requests have optimizations that make them faster when the sort
|
||||||
==== Efficient scrolling with Scroll-Scan
|
order is `_doc`. If you want to iterate over all documents regardless of the
|
||||||
|
order, this is the most efficient option:
|
||||||
Deep pagination with <<search-request-from-size,`from` and `size`>> -- e.g.
|
|
||||||
`?size=10&from=10000` -- is very inefficient as (in this example) 100,000
|
|
||||||
sorted results have to be retrieved from each shard and resorted in order to
|
|
||||||
return just 10 results. This process has to be repeated for every page
|
|
||||||
requested.
|
|
||||||
|
|
||||||
The `scroll` API keeps track of which results have already been returned and
|
|
||||||
so is able to return sorted results more efficiently than with deep
|
|
||||||
pagination. However, sorting results (which happens by default) still has a
|
|
||||||
cost.
|
|
||||||
|
|
||||||
Normally, you just want to retrieve all results and the order doesn't matter.
|
|
||||||
Scrolling can be combined with the <<scan,`scan`>> search type to disable
|
|
||||||
any scoring or sorting and to return results in the most efficient way
|
|
||||||
possible. All that is needed is to add `search_type=scan` to the query string
|
|
||||||
of the initial search request:
|
|
||||||
|
|
||||||
[source,js]
|
[source,js]
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
curl 'localhost:9200/twitter/tweet/_search?scroll=1m&search_type=scan' <1> -d '
|
curl -XGET 'localhost:9200/_search?scroll=1m' -d '
|
||||||
{
|
{
|
||||||
"query": {
|
"sort": [
|
||||||
"match" : {
|
"_doc"
|
||||||
"title" : "elasticsearch"
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
'
|
'
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
<1> Setting `search_type` to `scan` disables sorting and makes scrolling
|
|
||||||
very efficient.
|
|
||||||
|
|
||||||
A scanning scroll request differs from a standard scroll request in four
|
|
||||||
ways:
|
|
||||||
|
|
||||||
* No score is calculated and sorting is disabled. Results are returned in
|
|
||||||
the order they appear in the index.
|
|
||||||
|
|
||||||
* Aggregations are not supported.
|
|
||||||
|
|
||||||
* The response of the initial `search` request will not contain any results in
|
|
||||||
the `hits` array. The first results will be returned by the first `scroll`
|
|
||||||
request.
|
|
||||||
|
|
||||||
* The <<search-request-from-size,`size` parameter>> controls the number of
|
|
||||||
results *per shard*, not per request, so a `size` of `10` which hits 5
|
|
||||||
shards will return a maximum of 50 results per `scroll` request.
|
|
||||||
|
|
||||||
If you want the scoring to happen, even without sorting on it, set the
|
|
||||||
`track_scores` parameter to `true`.
|
|
||||||
|
|
||||||
[[scroll-search-context]]
|
[[scroll-search-context]]
|
||||||
==== Keeping the search context alive
|
==== Keeping the search context alive
|
||||||
|
|
|
@ -26,8 +26,8 @@ each shard using these global frequencies.
|
||||||
Also, because of the need to sort the results, getting back a large
|
Also, because of the need to sort the results, getting back a large
|
||||||
document set, or even scrolling it, while maintaining the correct sorting
|
document set, or even scrolling it, while maintaining the correct sorting
|
||||||
behavior can be a very expensive operation. For large result set
|
behavior can be a very expensive operation. For large result set
|
||||||
scrolling without sorting, the `scan` search type (explained below) is
|
scrolling, it is best to sort by `_doc` if the order in which documents
|
||||||
also available.
|
are returned is not important.
|
||||||
|
|
||||||
Elasticsearch is very flexible and allows to control the type of search
|
Elasticsearch is very flexible and allows to control the type of search
|
||||||
to execute on a *per search request* basis. The type can be configured
|
to execute on a *per search request* basis. The type can be configured
|
||||||
|
@ -77,9 +77,11 @@ API as it provides more options.
|
||||||
[[scan]]
|
[[scan]]
|
||||||
==== Scan
|
==== Scan
|
||||||
|
|
||||||
|
deprecated[2.1.0, `scan` does not provide any benefits over a regular `scroll` request sorted by `_doc`]
|
||||||
|
|
||||||
Parameter value: *scan*.
|
Parameter value: *scan*.
|
||||||
|
|
||||||
The `scan` search type disables sorting in order to allow very efficient
|
The `scan` search type disables sorting in order to allow very efficient
|
||||||
scrolling through large result sets. See <<scroll-scan>> for more.
|
scrolling through large result sets.
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -103,7 +103,9 @@ Defaults to no terminate_after.
|
||||||
|`size` |The number of hits to return. Defaults to `10`.
|
|`size` |The number of hits to return. Defaults to `10`.
|
||||||
|
|
||||||
|`search_type` |The type of the search operation to perform. Can be
|
|`search_type` |The type of the search operation to perform. Can be
|
||||||
`dfs_query_then_fetch`, `query_then_fetch`, `scan` or `count`
|
`dfs_query_then_fetch`, `query_then_fetch`, `scan`
|
||||||
|
deprecated[2.1.0,Replaced by a regular `scroll` sorted by `_doc`]
|
||||||
|
or `count`
|
||||||
deprecated[2.0.0-beta1,Replaced by `size: 0`]. Defaults to `query_then_fetch`. See
|
deprecated[2.0.0-beta1,Replaced by `size: 0`]. Defaults to `query_then_fetch`. See
|
||||||
<<search-request-search-type,_Search Type_>> for
|
<<search-request-search-type,_Search Type_>> for
|
||||||
more details on the different types of search that can be performed.
|
more details on the different types of search that can be performed.
|
||||||
|
|
|
@ -102,12 +102,17 @@ public class TransportDeleteByQueryAction extends HandledTransportAction<DeleteB
|
||||||
void executeScan() {
|
void executeScan() {
|
||||||
try {
|
try {
|
||||||
final SearchRequest scanRequest = new SearchRequest(request.indices()).types(request.types()).indicesOptions(request.indicesOptions());
|
final SearchRequest scanRequest = new SearchRequest(request.indices()).types(request.types()).indicesOptions(request.indicesOptions());
|
||||||
scanRequest.searchType(SearchType.SCAN).scroll(request.scroll());
|
scanRequest.scroll(request.scroll());
|
||||||
if (request.routing() != null) {
|
if (request.routing() != null) {
|
||||||
scanRequest.routing(request.routing());
|
scanRequest.routing(request.routing());
|
||||||
}
|
}
|
||||||
|
|
||||||
SearchSourceBuilder source = new SearchSourceBuilder().query(request.source()).fields("_routing", "_parent").fetchSource(false).version(true);
|
SearchSourceBuilder source = new SearchSourceBuilder()
|
||||||
|
.query(request.source())
|
||||||
|
.fields("_routing", "_parent")
|
||||||
|
.sort("_doc") // important for performance
|
||||||
|
.fetchSource(false)
|
||||||
|
.version(true);
|
||||||
if (request.size() > 0) {
|
if (request.size() > 0) {
|
||||||
source.size(request.size());
|
source.size(request.size());
|
||||||
}
|
}
|
||||||
|
@ -121,17 +126,9 @@ public class TransportDeleteByQueryAction extends HandledTransportAction<DeleteB
|
||||||
@Override
|
@Override
|
||||||
public void onResponse(SearchResponse searchResponse) {
|
public void onResponse(SearchResponse searchResponse) {
|
||||||
long hits = searchResponse.getHits().getTotalHits();
|
long hits = searchResponse.getHits().getTotalHits();
|
||||||
logger.trace("scan request executed: found [{}] document(s) to delete", hits);
|
logger.trace("first request executed: found [{}] document(s) to delete", hits);
|
||||||
addShardFailures(searchResponse.getShardFailures());
|
|
||||||
|
|
||||||
if (hits == 0) {
|
|
||||||
finishHim(searchResponse.getScrollId(), false, null);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
total.set(hits);
|
total.set(hits);
|
||||||
|
deleteHits(null, searchResponse);
|
||||||
logger.trace("start scrolling [{}] document(s)", hits);
|
|
||||||
executeScroll(searchResponse.getScrollId());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -151,6 +148,22 @@ public class TransportDeleteByQueryAction extends HandledTransportAction<DeleteB
|
||||||
scrollAction.execute(new SearchScrollRequest(scrollId).scroll(request.scroll()), new ActionListener<SearchResponse>() {
|
scrollAction.execute(new SearchScrollRequest(scrollId).scroll(request.scroll()), new ActionListener<SearchResponse>() {
|
||||||
@Override
|
@Override
|
||||||
public void onResponse(SearchResponse scrollResponse) {
|
public void onResponse(SearchResponse scrollResponse) {
|
||||||
|
deleteHits(scrollId, scrollResponse);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onFailure(Throwable e) {
|
||||||
|
logger.error("scroll request [{}] failed, scrolling document(s) is stopped", e, scrollId);
|
||||||
|
finishHim(scrollId, hasTimedOut(), e);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
} catch (Throwable t) {
|
||||||
|
logger.error("unable to execute scroll request [{}]", t, scrollId);
|
||||||
|
finishHim(scrollId, false, t);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void deleteHits(String scrollId, SearchResponse scrollResponse) {
|
||||||
final SearchHit[] docs = scrollResponse.getHits().getHits();
|
final SearchHit[] docs = scrollResponse.getHits().getHits();
|
||||||
final String nextScrollId = scrollResponse.getScrollId();
|
final String nextScrollId = scrollResponse.getScrollId();
|
||||||
addShardFailures(scrollResponse.getShardFailures());
|
addShardFailures(scrollResponse.getShardFailures());
|
||||||
|
@ -200,18 +213,6 @@ public class TransportDeleteByQueryAction extends HandledTransportAction<DeleteB
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public void onFailure(Throwable e) {
|
|
||||||
logger.error("scroll request [{}] failed, scrolling document(s) is stopped", e, scrollId);
|
|
||||||
finishHim(scrollId, hasTimedOut(), e);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
} catch (Throwable t) {
|
|
||||||
logger.error("unable to execute scroll request [{}]", t, scrollId);
|
|
||||||
finishHim(scrollId, false, t);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void onBulkResponse(String scrollId, BulkResponse bulkResponse) {
|
void onBulkResponse(String scrollId, BulkResponse bulkResponse) {
|
||||||
try {
|
try {
|
||||||
for (BulkItemResponse item : bulkResponse.getItems()) {
|
for (BulkItemResponse item : bulkResponse.getItems()) {
|
||||||
|
|
Loading…
Reference in New Issue