Merge pull request #12994 from jpountz/deprecate/scan
Deprecate the `scan` search type.
This commit is contained in:
commit
41d8fbe8f5
|
@ -54,7 +54,9 @@ public enum SearchType {
|
|||
/**
|
||||
* Performs scanning of the results which executes the search without any sorting.
|
||||
* It will automatically start scrolling the result set.
|
||||
* @deprecated will be removed in 3.0, you should do a regular scroll instead, ordered by `_doc`
|
||||
*/
|
||||
@Deprecated
|
||||
SCAN((byte) 4),
|
||||
/**
|
||||
* Only counts the results, will still execute aggregations and the like.
|
||||
|
@ -69,6 +71,7 @@ public enum SearchType {
|
|||
public static final SearchType DEFAULT = QUERY_THEN_FETCH;
|
||||
|
||||
private static final ParseField COUNT_VALUE = new ParseField("count").withAllDeprecated("query_then_fetch");
|
||||
private static final ParseField SCAN_VALUE = new ParseField("scan").withAllDeprecated("query_then_fetch sorting on `_doc`");
|
||||
|
||||
private byte id;
|
||||
|
||||
|
@ -121,7 +124,7 @@ public enum SearchType {
|
|||
return SearchType.QUERY_THEN_FETCH;
|
||||
} else if ("query_and_fetch".equals(searchType)) {
|
||||
return SearchType.QUERY_AND_FETCH;
|
||||
} else if ("scan".equals(searchType)) {
|
||||
} else if (parseFieldMatcher.match(searchType, SCAN_VALUE)) {
|
||||
return SearchType.SCAN;
|
||||
} else if (parseFieldMatcher.match(searchType, COUNT_VALUE)) {
|
||||
return SearchType.COUNT;
|
||||
|
|
|
@ -40,6 +40,7 @@ import org.elasticsearch.threadpool.ThreadPool;
|
|||
|
||||
import static org.elasticsearch.action.search.type.TransportSearchHelper.buildScrollId;
|
||||
|
||||
@Deprecated // remove in 3.0
|
||||
public class TransportSearchScanAction extends TransportSearchTypeAction {
|
||||
|
||||
@Inject
|
||||
|
|
|
@ -263,6 +263,7 @@ public class SearchService extends AbstractLifecycleComponent<SearchService> {
|
|||
}
|
||||
}
|
||||
|
||||
@Deprecated // remove in 3.0
|
||||
public QuerySearchResult executeScan(ShardSearchRequest request) {
|
||||
final SearchContext context = createAndPutContext(request);
|
||||
final int originalSize = context.size();
|
||||
|
|
|
@ -418,6 +418,7 @@ public class SearchServiceTransportAction extends AbstractComponent {
|
|||
}
|
||||
}
|
||||
|
||||
@Deprecated // remove in 3.0
|
||||
class SearchScanTransportHandler implements TransportRequestHandler<ShardSearchTransportRequest> {
|
||||
@Override
|
||||
public void messageReceived(ShardSearchTransportRequest request, TransportChannel channel) throws Exception {
|
||||
|
|
|
@ -6,7 +6,7 @@ The delete-by-query plugin adds support for deleting all of the documents
|
|||
replacement for the problematic _delete-by-query_ functionality which has been
|
||||
removed from Elasticsearch core.
|
||||
|
||||
Internally, it uses the {ref}/search-request-scroll.html#scroll-scan[Scan/Scroll]
|
||||
Internally, it uses {ref}/search-request-scroll.html[Scroll]
|
||||
and {ref}/docs-bulk.html[Bulk] APIs to delete documents in an efficient and
|
||||
safe manner. It is slower than the old _delete-by-query_ functionality, but
|
||||
fixes the problems with the previous implementation.
|
||||
|
@ -101,7 +101,7 @@ See {ref}/search-uri-request.html[URI search request] for details.
|
|||
|
||||
`size`::
|
||||
|
||||
The number of hits returned *per shard* by the {ref}/search-request-scroll.html#scroll-scan[scan]
|
||||
The number of hits returned by the {ref}/search-request-scroll.html[scroll]
|
||||
request. Defaults to 10. May also be specified in the request body.
|
||||
|
||||
`timeout`::
|
||||
|
@ -148,7 +148,7 @@ The JSON response looks like this:
|
|||
--------------------------------------------------
|
||||
|
||||
Internally, the query is used to execute an initial
|
||||
{ref}/search-request-scroll.html#scroll-scan[scroll/scan] request. As hits are
|
||||
{ref}/search-request-scroll.html[scroll] request. As hits are
|
||||
pulled from the scroll API, they are passed to the {ref}/docs-bulk.html[Bulk
|
||||
API] for deletion.
|
||||
|
||||
|
@ -157,7 +157,7 @@ was visible to search at the time the request was executed. Any documents
|
|||
that have been reindexed or updated during execution will not be deleted.
|
||||
|
||||
Since documents can be updated or deleted by external operations during the
|
||||
_scan-scroll-bulk_ process, the plugin keeps track of different counters for
|
||||
_scroll-bulk_ process, the plugin keeps track of different counters for
|
||||
each index, with the totals displayed under the `_all` index. The counters
|
||||
are as follows:
|
||||
|
||||
|
@ -212,7 +212,7 @@ Resiliency::
|
|||
=== New delete-by-query implementation
|
||||
|
||||
The new implementation, provided by this plugin, is built internally
|
||||
using {ref}/search-request-scroll.html#scroll-scan[scan and scroll] to return
|
||||
using {ref}/search-request-scroll.html[scroll] to return
|
||||
the document IDs and versions of all the documents that need to be deleted.
|
||||
It then uses the {ref}/docs-bulk.html[`bulk` API] to do the actual deletion.
|
||||
|
||||
|
@ -231,8 +231,8 @@ try-once::
|
|||
|
||||
syntactic sugar::
|
||||
|
||||
A delete-by-query is equivalent to a scan/scroll search and corresponding
|
||||
bulk-deletes by ID.
|
||||
A delete-by-query is equivalent to a scroll search ordered by `_doc` and
|
||||
corresponding bulk-deletes by ID.
|
||||
|
||||
point-in-time::
|
||||
|
||||
|
|
|
@ -16,6 +16,7 @@ As a general rule:
|
|||
|
||||
See <<setup-upgrade>> for more info.
|
||||
--
|
||||
include::migrate_2_1.asciidoc[]
|
||||
|
||||
include::migrate_2_0.asciidoc[]
|
||||
|
||||
|
|
|
@ -27,8 +27,8 @@ The old delete-by-query functionality was fast but unsafe. It could lead to
|
|||
document differences between the primary and replica shards, and could even
|
||||
produce out of memory exceptions and cause the cluster to crash.
|
||||
|
||||
This feature has been reimplemented using the <<scroll-scan,scroll/scan>> and
|
||||
the <<docs-bulk,`bulk`>> API, which may be slower for queries which match
|
||||
This feature has been reimplemented using the <<search-request-scroll,scroll>> and
|
||||
<<docs-bulk,`bulk`>> APIs, which may be slower for queries which match
|
||||
large numbers of documents, but is safe.
|
||||
|
||||
Currently, a long running delete-by-query job cannot be cancelled, which is
|
||||
|
|
|
@ -0,0 +1,27 @@
|
|||
[[breaking-changes-2.1]]
|
||||
== Breaking changes in 2.1
|
||||
|
||||
This section discusses the changes that you need to be aware of when migrating
|
||||
your application to Elasticsearch 2.1.
|
||||
|
||||
=== Search changes
|
||||
|
||||
==== `search_type=scan` deprecated
|
||||
|
||||
The `scan` search type has been deprecated. All benefits from this search
|
||||
type can now be achieved by doing a scroll request that sorts documents in
|
||||
`_doc` order, for instance:
|
||||
|
||||
[source,sh]
|
||||
---------------
|
||||
GET /my_index/_search?scroll=2m
|
||||
{
|
||||
"sort": [
|
||||
"_doc"
|
||||
]
|
||||
}
|
||||
---------------
|
||||
|
||||
Scroll requests sorted by `_doc` have been optimized to more efficiently resume
|
||||
from where the previous request stopped, so this will have the same performance
|
||||
characteristics as the former `scan` search type.
|
|
@ -64,7 +64,7 @@ And here is a sample response:
|
|||
`search_type`::
|
||||
|
||||
The type of the search operation to perform. Can be
|
||||
`dfs_query_then_fetch`, `query_then_fetch`, or 'scan'.
|
||||
`dfs_query_then_fetch` or `query_then_fetch`.
|
||||
Defaults to `query_then_fetch`.
|
||||
See <<search-request-search-type,_Search Type_>> for more.
|
||||
|
||||
|
|
|
@ -90,59 +90,20 @@ used.
|
|||
NOTE: If the request specifies aggregations, only the initial search response
|
||||
will contain the aggregations results.
|
||||
|
||||
[[scroll-scan]]
|
||||
==== Efficient scrolling with Scroll-Scan
|
||||
|
||||
Deep pagination with <<search-request-from-size,`from` and `size`>> -- e.g.
|
||||
`?size=10&from=10000` -- is very inefficient as (in this example) 100,000
|
||||
sorted results have to be retrieved from each shard and resorted in order to
|
||||
return just 10 results. This process has to be repeated for every page
|
||||
requested.
|
||||
|
||||
The `scroll` API keeps track of which results have already been returned and
|
||||
so is able to return sorted results more efficiently than with deep
|
||||
pagination. However, sorting results (which happens by default) still has a
|
||||
cost.
|
||||
|
||||
Normally, you just want to retrieve all results and the order doesn't matter.
|
||||
Scrolling can be combined with the <<scan,`scan`>> search type to disable
|
||||
any scoring or sorting and to return results in the most efficient way
|
||||
possible. All that is needed is to add `search_type=scan` to the query string
|
||||
of the initial search request:
|
||||
NOTE: Scroll requests have optimizations that make them faster when the sort
|
||||
order is `_doc`. If you want to iterate over all documents regardless of the
|
||||
order, this is the most efficient option:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
curl 'localhost:9200/twitter/tweet/_search?scroll=1m&search_type=scan' <1> -d '
|
||||
curl -XGET 'localhost:9200/_search?scroll=1m' -d '
|
||||
{
|
||||
"query": {
|
||||
"match" : {
|
||||
"title" : "elasticsearch"
|
||||
}
|
||||
}
|
||||
"sort": [
|
||||
"_doc"
|
||||
}
|
||||
}
|
||||
'
|
||||
--------------------------------------------------
|
||||
<1> Setting `search_type` to `scan` disables sorting and makes scrolling
|
||||
very efficient.
|
||||
|
||||
A scanning scroll request differs from a standard scroll request in four
|
||||
ways:
|
||||
|
||||
* No score is calculated and sorting is disabled. Results are returned in
|
||||
the order they appear in the index.
|
||||
|
||||
* Aggregations are not supported.
|
||||
|
||||
* The response of the initial `search` request will not contain any results in
|
||||
the `hits` array. The first results will be returned by the first `scroll`
|
||||
request.
|
||||
|
||||
* The <<search-request-from-size,`size` parameter>> controls the number of
|
||||
results *per shard*, not per request, so a `size` of `10` which hits 5
|
||||
shards will return a maximum of 50 results per `scroll` request.
|
||||
|
||||
If you want the scoring to happen, even without sorting on it, set the
|
||||
`track_scores` parameter to `true`.
|
||||
|
||||
[[scroll-search-context]]
|
||||
==== Keeping the search context alive
|
||||
|
|
|
@ -26,8 +26,8 @@ each shard using these global frequencies.
|
|||
Also, because of the need to sort the results, getting back a large
|
||||
document set, or even scrolling it, while maintaining the correct sorting
|
||||
behavior can be a very expensive operation. For large result set
|
||||
scrolling without sorting, the `scan` search type (explained below) is
|
||||
also available.
|
||||
scrolling, it is best to sort by `_doc` if the order in which documents
|
||||
are returned is not important.
|
||||
|
||||
Elasticsearch is very flexible and allows to control the type of search
|
||||
to execute on a *per search request* basis. The type can be configured
|
||||
|
@ -77,9 +77,11 @@ API as it provides more options.
|
|||
[[scan]]
|
||||
==== Scan
|
||||
|
||||
deprecated[2.1.0, `scan` does not provide any benefits over a regular `scroll` request sorted by `_doc`]
|
||||
|
||||
Parameter value: *scan*.
|
||||
|
||||
The `scan` search type disables sorting in order to allow very efficient
|
||||
scrolling through large result sets. See <<scroll-scan>> for more.
|
||||
scrolling through large result sets.
|
||||
|
||||
|
||||
|
|
|
@ -103,7 +103,9 @@ Defaults to no terminate_after.
|
|||
|`size` |The number of hits to return. Defaults to `10`.
|
||||
|
||||
|`search_type` |The type of the search operation to perform. Can be
|
||||
`dfs_query_then_fetch`, `query_then_fetch`, `scan` or `count`
|
||||
`dfs_query_then_fetch`, `query_then_fetch`, `scan`
|
||||
deprecated[2.1.0,Replaced by a regular `scroll` sorted by `_doc`]
|
||||
or `count`
|
||||
deprecated[2.0.0-beta1,Replaced by `size: 0`]. Defaults to `query_then_fetch`. See
|
||||
<<search-request-search-type,_Search Type_>> for
|
||||
more details on the different types of search that can be performed.
|
||||
|
|
|
@ -102,12 +102,17 @@ public class TransportDeleteByQueryAction extends HandledTransportAction<DeleteB
|
|||
void executeScan() {
|
||||
try {
|
||||
final SearchRequest scanRequest = new SearchRequest(request.indices()).types(request.types()).indicesOptions(request.indicesOptions());
|
||||
scanRequest.searchType(SearchType.SCAN).scroll(request.scroll());
|
||||
scanRequest.scroll(request.scroll());
|
||||
if (request.routing() != null) {
|
||||
scanRequest.routing(request.routing());
|
||||
}
|
||||
|
||||
SearchSourceBuilder source = new SearchSourceBuilder().query(request.source()).fields("_routing", "_parent").fetchSource(false).version(true);
|
||||
SearchSourceBuilder source = new SearchSourceBuilder()
|
||||
.query(request.source())
|
||||
.fields("_routing", "_parent")
|
||||
.sort("_doc") // important for performance
|
||||
.fetchSource(false)
|
||||
.version(true);
|
||||
if (request.size() > 0) {
|
||||
source.size(request.size());
|
||||
}
|
||||
|
@ -121,17 +126,9 @@ public class TransportDeleteByQueryAction extends HandledTransportAction<DeleteB
|
|||
@Override
|
||||
public void onResponse(SearchResponse searchResponse) {
|
||||
long hits = searchResponse.getHits().getTotalHits();
|
||||
logger.trace("scan request executed: found [{}] document(s) to delete", hits);
|
||||
addShardFailures(searchResponse.getShardFailures());
|
||||
|
||||
if (hits == 0) {
|
||||
finishHim(searchResponse.getScrollId(), false, null);
|
||||
return;
|
||||
}
|
||||
logger.trace("first request executed: found [{}] document(s) to delete", hits);
|
||||
total.set(hits);
|
||||
|
||||
logger.trace("start scrolling [{}] document(s)", hits);
|
||||
executeScroll(searchResponse.getScrollId());
|
||||
deleteHits(null, searchResponse);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -151,53 +148,7 @@ public class TransportDeleteByQueryAction extends HandledTransportAction<DeleteB
|
|||
scrollAction.execute(new SearchScrollRequest(scrollId).scroll(request.scroll()), new ActionListener<SearchResponse>() {
|
||||
@Override
|
||||
public void onResponse(SearchResponse scrollResponse) {
|
||||
final SearchHit[] docs = scrollResponse.getHits().getHits();
|
||||
final String nextScrollId = scrollResponse.getScrollId();
|
||||
addShardFailures(scrollResponse.getShardFailures());
|
||||
|
||||
if (logger.isTraceEnabled()) {
|
||||
logger.trace("scroll request [{}] executed: [{}] document(s) returned", scrollId, docs.length);
|
||||
}
|
||||
|
||||
if ((docs.length == 0) || (nextScrollId == null)) {
|
||||
logger.trace("scrolling documents terminated");
|
||||
finishHim(scrollId, false, null);
|
||||
return;
|
||||
}
|
||||
|
||||
if (hasTimedOut()) {
|
||||
logger.trace("scrolling documents timed out");
|
||||
finishHim(scrollId, true, null);
|
||||
return;
|
||||
}
|
||||
|
||||
// Delete the scrolled documents using the Bulk API
|
||||
BulkRequest bulkRequest = new BulkRequest();
|
||||
for (SearchHit doc : docs) {
|
||||
DeleteRequest delete = new DeleteRequest(doc.index(), doc.type(), doc.id()).version(doc.version());
|
||||
SearchHitField routing = doc.field("_routing");
|
||||
if (routing != null) {
|
||||
delete.routing((String) routing.value());
|
||||
}
|
||||
SearchHitField parent = doc.field("_parent");
|
||||
if (parent != null) {
|
||||
delete.parent((String) parent.value());
|
||||
}
|
||||
bulkRequest.add(delete);
|
||||
}
|
||||
|
||||
logger.trace("executing bulk request with [{}] deletions", bulkRequest.numberOfActions());
|
||||
client.bulk(bulkRequest, new ActionListener<BulkResponse>() {
|
||||
@Override
|
||||
public void onResponse(BulkResponse bulkResponse) {
|
||||
onBulkResponse(nextScrollId, bulkResponse);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onFailure(Throwable e) {
|
||||
onBulkFailure(nextScrollId, docs, e);
|
||||
}
|
||||
});
|
||||
deleteHits(scrollId, scrollResponse);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -212,6 +163,56 @@ public class TransportDeleteByQueryAction extends HandledTransportAction<DeleteB
|
|||
}
|
||||
}
|
||||
|
||||
void deleteHits(String scrollId, SearchResponse scrollResponse) {
|
||||
final SearchHit[] docs = scrollResponse.getHits().getHits();
|
||||
final String nextScrollId = scrollResponse.getScrollId();
|
||||
addShardFailures(scrollResponse.getShardFailures());
|
||||
|
||||
if (logger.isTraceEnabled()) {
|
||||
logger.trace("scroll request [{}] executed: [{}] document(s) returned", scrollId, docs.length);
|
||||
}
|
||||
|
||||
if ((docs.length == 0) || (nextScrollId == null)) {
|
||||
logger.trace("scrolling documents terminated");
|
||||
finishHim(scrollId, false, null);
|
||||
return;
|
||||
}
|
||||
|
||||
if (hasTimedOut()) {
|
||||
logger.trace("scrolling documents timed out");
|
||||
finishHim(scrollId, true, null);
|
||||
return;
|
||||
}
|
||||
|
||||
// Delete the scrolled documents using the Bulk API
|
||||
BulkRequest bulkRequest = new BulkRequest();
|
||||
for (SearchHit doc : docs) {
|
||||
DeleteRequest delete = new DeleteRequest(doc.index(), doc.type(), doc.id()).version(doc.version());
|
||||
SearchHitField routing = doc.field("_routing");
|
||||
if (routing != null) {
|
||||
delete.routing((String) routing.value());
|
||||
}
|
||||
SearchHitField parent = doc.field("_parent");
|
||||
if (parent != null) {
|
||||
delete.parent((String) parent.value());
|
||||
}
|
||||
bulkRequest.add(delete);
|
||||
}
|
||||
|
||||
logger.trace("executing bulk request with [{}] deletions", bulkRequest.numberOfActions());
|
||||
client.bulk(bulkRequest, new ActionListener<BulkResponse>() {
|
||||
@Override
|
||||
public void onResponse(BulkResponse bulkResponse) {
|
||||
onBulkResponse(nextScrollId, bulkResponse);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onFailure(Throwable e) {
|
||||
onBulkFailure(nextScrollId, docs, e);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
void onBulkResponse(String scrollId, BulkResponse bulkResponse) {
|
||||
try {
|
||||
for (BulkItemResponse item : bulkResponse.getItems()) {
|
||||
|
|
Loading…
Reference in New Issue