From 787acb14b91b7065c4ed80167fb4bdb6cc444c86 Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Fri, 25 Jan 2019 13:45:39 +0100 Subject: [PATCH] Track total hits up to 10,000 by default (#37466) This commit changes the default for the `track_total_hits` option of the search request to `10,000`. This means that by default search requests will accurately track the total hit count up to `10,000` documents, requests that match more than this value will set the `"total.relation"` to `"gte"` (e.g. greater than or equals) and the `"total.value"` to `10,000` in the search response. Scroll queries are not impacted, they will continue to count the total hits accurately. The default is set back to `true` (accurate hit count) if `rest_total_hits_as_int` is set in the search request. I choose `10,000` as the default because that's also the number we use to limit pagination. This means that users will be able to know how far they can jump (up to 10,000) even if the total number of hits is not accurate. Closes #33028 --- docs/reference/getting-started.asciidoc | 6 +- .../index-modules/index-sorting.asciidoc | 3 +- .../migration/migrate_7_0/search.asciidoc | 31 +++++ .../query-dsl/rank-feature-query.asciidoc | 2 +- .../search/request/track-total-hits.asciidoc | 119 +++++++++--------- docs/reference/search/uri-request.asciidoc | 2 +- .../search/AbstractSearchAsyncAction.java | 8 +- .../action/search/SearchPhaseController.java | 11 +- .../action/search/SearchRequest.java | 6 +- .../common/io/stream/StreamInput.java | 10 ++ .../common/io/stream/StreamOutput.java | 12 ++ .../rest/action/search/RestCountAction.java | 2 +- .../rest/action/search/RestSearchAction.java | 27 ++-- .../elasticsearch/search/SearchService.java | 8 +- .../search/builder/SearchSourceBuilder.java | 22 ++-- .../search/internal/SearchContext.java | 2 +- .../query/EarlyTerminatingCollector.java | 10 ++ .../search/query/TopDocsCollectorContext.java | 9 +- .../search/profile/query/QueryProfilerIT.java | 2 + .../search/RandomSearchRequestGenerator.java | 2 +- .../BatchedDocumentsIteratorTests.java | 3 +- .../authc/esnative/NativeUsersStore.java | 2 + .../authz/store/NativeRolesStore.java | 3 + .../xpack/sql/execution/search/Querier.java | 9 +- 24 files changed, 215 insertions(+), 96 deletions(-) diff --git a/docs/reference/getting-started.asciidoc b/docs/reference/getting-started.asciidoc index d32eeaff8c7..d8656f7ac4c 100755 --- a/docs/reference/getting-started.asciidoc +++ b/docs/reference/getting-started.asciidoc @@ -793,7 +793,11 @@ As for the response, we see the following parts: * `hits._score` and `max_score` - ignore these fields for now The accuracy of `hits.total` is controlled by the request parameter `track_total_hits`, when set to true -the request will track the total hits accurately (`"relation": "eq"`). +the request will track the total hits accurately (`"relation": "eq"`). It defaults to `10,000` +which means that the total hit count is accurately tracked up to `10,000` documents. +You can force an accurate count by setting `track_total_hits` to true explicitly. +See the <> documentation +for more details. Here is the same exact search above using the alternative request body method: diff --git a/docs/reference/index-modules/index-sorting.asciidoc b/docs/reference/index-modules/index-sorting.asciidoc index b4648dd256d..a387deaca6d 100644 --- a/docs/reference/index-modules/index-sorting.asciidoc +++ b/docs/reference/index-modules/index-sorting.asciidoc @@ -195,7 +195,8 @@ as soon as N documents have been collected per segment. <1> The total number of hits matching the query is unknown because of early termination. -NOTE: Aggregations will collect all documents that match the query regardless of the value of `track_total_hits` +NOTE: Aggregations will collect all documents that match the query regardless +of the value of `track_total_hits` [[index-modules-index-sorting-conjunctions]] === Use index sorting to speed up conjunctions diff --git a/docs/reference/migration/migrate_7_0/search.asciidoc b/docs/reference/migration/migrate_7_0/search.asciidoc index 61cbee85130..67adf936340 100644 --- a/docs/reference/migration/migrate_7_0/search.asciidoc +++ b/docs/reference/migration/migrate_7_0/search.asciidoc @@ -205,3 +205,34 @@ If `track_total_hits` is set to `false` in the search request the search respons will set `hits.total` to null and the object will not be displayed in the rest layer. You can add `rest_total_hits_as_int=true` in the search request parameters to get the old format back (`"total": -1`). + +[float] +==== `track_total_hits` defaults to 10,000 + +By default search request will count the total hits accurately up to `10,000` +documents. If the total number of hits that match the query is greater than this + value, the response will indicate that the returned value is a lower bound: + +[source,js] +-------------------------------------------------- +{ + "_shards": ... + "timed_out": false, + "took": 100, + "hits": { + "max_score": 1.0, + "total" : { + "value": 10000, <1> + "relation": "gte" <2> + }, + "hits": ... + } +} +-------------------------------------------------- +// NOTCONSOLE + +<1> There are at least 10000 documents that match the query +<2> This is a lower bound (`"gte"`). + +You can force the count to always be accurate by setting `"track_total_hits` +to true explicitly in the search request. \ No newline at end of file diff --git a/docs/reference/query-dsl/rank-feature-query.asciidoc b/docs/reference/query-dsl/rank-feature-query.asciidoc index 277d45f257d..fe23c5f3ec2 100644 --- a/docs/reference/query-dsl/rank-feature-query.asciidoc +++ b/docs/reference/query-dsl/rank-feature-query.asciidoc @@ -11,7 +11,7 @@ of the query. Compared to using <> or other ways to modify the score, this query has the benefit of being able to efficiently skip non-competitive hits when -<> is set to `false`. Speedups may be +<> is not set to `true`. Speedups may be spectacular. Here is an example that indexes various features: diff --git a/docs/reference/search/request/track-total-hits.asciidoc b/docs/reference/search/request/track-total-hits.asciidoc index bdad4dbde91..c416c777366 100644 --- a/docs/reference/search/request/track-total-hits.asciidoc +++ b/docs/reference/search/request/track-total-hits.asciidoc @@ -4,9 +4,20 @@ Generally the total hit count can't be computed accurately without visiting all matches, which is costly for queries that match lots of documents. The `track_total_hits` parameter allows you to control how the total number of hits -should be tracked. When set to `true` the search response will always track the -number of hits that match the query accurately (e.g. `total.relation` will always -be equal to `"eq"` when `track_total_hits is set to true). +should be tracked. +Given that it is often enough to have a lower bound of the number of hits, +such as "there are at least 10000 hits", the default is set to `10,000`. +This means that requests will count the total hit accurately up to `10,000` hits. +It's is a good trade off to speed up searches if you don't need the accurate number +of hits after a certain threshold. + +When set to `true` the search response will always track the number of hits that +match the query accurately (e.g. `total.relation` will always be equal to `"eq"` +when `track_total_hits is set to true). Otherwise the `"total.relation"` returned +in the `"total"` object in the search response determines how the `"total.value"` +should be interpreted. A value of `"gte"` means that the `"total.value"` is a +lower bound of the total hits that match the query and a value of `"eq"` indicates +that `"total.value"` is the accurate count. [source,js] -------------------------------------------------- @@ -50,57 +61,9 @@ GET twitter/_search <1> The total number of hits that match the query. <2> The count is accurate (e.g. `"eq"` means equals). -If you don't need to track the total number of hits you can improve query times -by setting this option to `false`. In such case the search can efficiently skip -non-competitive hits because it doesn't need to count all matches: - -[source,js] --------------------------------------------------- -GET twitter/_search -{ - "track_total_hits": false, - "query": { - "match" : { - "message" : "Elasticsearch" - } - } -} --------------------------------------------------- -// CONSOLE -// TEST[continued] - -\... returns: - -[source,js] --------------------------------------------------- -{ - "_shards": ... - "timed_out": false, - "took": 10, - "hits" : { <1> - "max_score": 1.0, - "hits": ... - } -} --------------------------------------------------- -// TESTRESPONSE[s/"_shards": \.\.\./"_shards": "$body._shards",/] -// TESTRESPONSE[s/"took": 10/"took": $body.took/] -// TESTRESPONSE[s/"max_score": 1\.0/"max_score": $body.hits.max_score/] -// TESTRESPONSE[s/"hits": \.\.\./"hits": "$body.hits.hits"/] - -<1> The total number of hits is unknown. - -Given that it is often enough to have a lower bound of the number of hits, -such as "there are at least 1000 hits", it is also possible to set -`track_total_hits` as an integer that represents the number of hits to count -accurately. The search can efficiently skip non-competitive document as soon -as collecting at least $`track_total_hits` documents. This is a good trade -off to speed up searches if you don't need the accurate number of hits after -a certain threshold. - - -For instance the following query will track the total hit count that match -the query accurately up to 100 documents: +It is also possible to set `track_total_hits` to an integer. +For instance the following query will accurately track the total hit count that match +the query up to 100 documents: [source,js] -------------------------------------------------- @@ -118,8 +81,8 @@ GET twitter/_search // TEST[continued] The `hits.total.relation` in the response will indicate if the -value returned in `hits.total.value` is accurate (`eq`) or a lower -bound of the total (`gte`). +value returned in `hits.total.value` is accurate (`"eq"`) or a lower +bound of the total (`"gte"`). For instance the following response: @@ -173,4 +136,46 @@ will indicate that the returned value is a lower bound: // TEST[skip:response is already tested in the previous snippet] <1> There are at least 100 documents that match the query -<2> This is a lower bound (`gte`). \ No newline at end of file +<2> This is a lower bound (`"gte"`). + +If you don't need to track the total number of hits at all you can improve query +times by setting this option to `false`: + +[source,js] +-------------------------------------------------- +GET twitter/_search +{ + "track_total_hits": false, + "query": { + "match" : { + "message" : "Elasticsearch" + } + } +} +-------------------------------------------------- +// CONSOLE +// TEST[continued] + +\... returns: + +[source,js] +-------------------------------------------------- +{ + "_shards": ... + "timed_out": false, + "took": 10, + "hits" : { <1> + "max_score": 1.0, + "hits": ... + } +} +-------------------------------------------------- +// TESTRESPONSE[s/"_shards": \.\.\./"_shards": "$body._shards",/] +// TESTRESPONSE[s/"took": 10/"took": $body.took/] +// TESTRESPONSE[s/"max_score": 1\.0/"max_score": $body.hits.max_score/] +// TESTRESPONSE[s/"hits": \.\.\./"hits": "$body.hits.hits"/] + +<1> The total number of hits is unknown. + +Finally you can force an accurate count by setting `"track_total_hits"` +to `true` in the request. \ No newline at end of file diff --git a/docs/reference/search/uri-request.asciidoc b/docs/reference/search/uri-request.asciidoc index 87e1da907fb..7bf769c6d7f 100644 --- a/docs/reference/search/uri-request.asciidoc +++ b/docs/reference/search/uri-request.asciidoc @@ -101,7 +101,7 @@ is important). |`track_scores` |When sorting, set to `true` in order to still track scores and return them as part of each hit. -|`track_total_hits` |Defaults to true. Set to `false` in order to disable the tracking +|`track_total_hits` |Defaults to `10,000`. Set to `false` in order to disable the tracking of the total number of hits that match the query. It also accepts an integer which in this case represents the number of hits to count accurately. diff --git a/server/src/main/java/org/elasticsearch/action/search/AbstractSearchAsyncAction.java b/server/src/main/java/org/elasticsearch/action/search/AbstractSearchAsyncAction.java index d6abbf73e88..45bfb099f2b 100644 --- a/server/src/main/java/org/elasticsearch/action/search/AbstractSearchAsyncAction.java +++ b/server/src/main/java/org/elasticsearch/action/search/AbstractSearchAsyncAction.java @@ -114,9 +114,11 @@ abstract class AbstractSearchAsyncAction exten //no search shards to search on, bail with empty response //(it happens with search across _all with no indices around and consistent with broadcast operations) - boolean withTotalHits = request.source() != null ? - // total hits is null in the response if the tracking of total hits is disabled - request.source().trackTotalHitsUpTo() != SearchContext.TRACK_TOTAL_HITS_DISABLED : true; + int trackTotalHitsUpTo = request.source() == null ? SearchContext.DEFAULT_TRACK_TOTAL_HITS_UP_TO : + request.source().trackTotalHitsUpTo() == null ? SearchContext.DEFAULT_TRACK_TOTAL_HITS_UP_TO : + request.source().trackTotalHitsUpTo(); + // total hits is null in the response if the tracking of total hits is disabled + boolean withTotalHits = trackTotalHitsUpTo != SearchContext.TRACK_TOTAL_HITS_DISABLED; listener.onResponse(new SearchResponse(InternalSearchResponse.empty(withTotalHits), null, 0, 0, 0, buildTookInMillis(), ShardSearchFailure.EMPTY_ARRAY, clusters)); return; diff --git a/server/src/main/java/org/elasticsearch/action/search/SearchPhaseController.java b/server/src/main/java/org/elasticsearch/action/search/SearchPhaseController.java index 027d9d5f10c..67f33398bba 100644 --- a/server/src/main/java/org/elasticsearch/action/search/SearchPhaseController.java +++ b/server/src/main/java/org/elasticsearch/action/search/SearchPhaseController.java @@ -696,6 +696,15 @@ public final class SearchPhaseController { int getNumReducePhases() { return numReducePhases; } } + private int resolveTrackTotalHits(SearchRequest request) { + if (request.scroll() != null) { + // no matter what the value of track_total_hits is + return SearchContext.TRACK_TOTAL_HITS_ACCURATE; + } + return request.source() == null ? SearchContext.DEFAULT_TRACK_TOTAL_HITS_UP_TO : request.source().trackTotalHitsUpTo() == null ? + SearchContext.DEFAULT_TRACK_TOTAL_HITS_UP_TO : request.source().trackTotalHitsUpTo(); + } + /** * Returns a new ArraySearchPhaseResults instance. This might return an instance that reduces search responses incrementally. */ @@ -704,7 +713,7 @@ public final class SearchPhaseController { boolean isScrollRequest = request.scroll() != null; final boolean hasAggs = source != null && source.aggregations() != null; final boolean hasTopDocs = source == null || source.size() != 0; - final int trackTotalHitsUpTo = source == null ? SearchContext.DEFAULT_TRACK_TOTAL_HITS_UP_TO : source.trackTotalHitsUpTo(); + final int trackTotalHitsUpTo = resolveTrackTotalHits(request); final boolean finalReduce = request.getLocalClusterAlias() == null; if (isScrollRequest == false && (hasAggs || hasTopDocs)) { diff --git a/server/src/main/java/org/elasticsearch/action/search/SearchRequest.java b/server/src/main/java/org/elasticsearch/action/search/SearchRequest.java index 69b090fb89a..020887068f0 100644 --- a/server/src/main/java/org/elasticsearch/action/search/SearchRequest.java +++ b/server/src/main/java/org/elasticsearch/action/search/SearchRequest.java @@ -32,6 +32,7 @@ import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.xcontent.ToXContent; import org.elasticsearch.search.Scroll; import org.elasticsearch.search.builder.SearchSourceBuilder; +import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.tasks.Task; import org.elasticsearch.tasks.TaskId; @@ -222,7 +223,10 @@ public final class SearchRequest extends ActionRequest implements IndicesRequest public ActionRequestValidationException validate() { ActionRequestValidationException validationException = null; final Scroll scroll = scroll(); - if (source != null && source.trackTotalHits() == false && scroll != null) { + if (source != null + && source.trackTotalHitsUpTo() != null + && source.trackTotalHitsUpTo() != SearchContext.TRACK_TOTAL_HITS_ACCURATE + && scroll != null) { validationException = addValidationError("disabling [track_total_hits] is not allowed in a scroll context", validationException); } diff --git a/server/src/main/java/org/elasticsearch/common/io/stream/StreamInput.java b/server/src/main/java/org/elasticsearch/common/io/stream/StreamInput.java index 723de8fd5da..dde71ad68e1 100644 --- a/server/src/main/java/org/elasticsearch/common/io/stream/StreamInput.java +++ b/server/src/main/java/org/elasticsearch/common/io/stream/StreamInput.java @@ -204,6 +204,16 @@ public abstract class StreamInput extends InputStream { | ((readByte() & 0xFF) << 8) | (readByte() & 0xFF); } + /** + * Reads an optional {@link Integer}. + */ + public Integer readOptionalInt() throws IOException { + if (readBoolean()) { + return readInt(); + } + return null; + } + /** * Reads an int stored in variable-length format. Reads between one and * five bytes. Smaller values take fewer bytes. Negative numbers diff --git a/server/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java b/server/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java index e9709de1a44..3031e2f2e71 100644 --- a/server/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java +++ b/server/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java @@ -323,6 +323,18 @@ public abstract class StreamOutput extends OutputStream { } } + /** + * Writes an optional {@link Integer}. + */ + public void writeOptionalInt(@Nullable Integer integer) throws IOException { + if (integer == null) { + writeBoolean(false); + } else { + writeBoolean(true); + writeInt(integer); + } + } + public void writeOptionalVInt(@Nullable Integer integer) throws IOException { if (integer == null) { writeBoolean(false); diff --git a/server/src/main/java/org/elasticsearch/rest/action/search/RestCountAction.java b/server/src/main/java/org/elasticsearch/rest/action/search/RestCountAction.java index 04d13133f08..ecdd34ca07c 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/search/RestCountAction.java +++ b/server/src/main/java/org/elasticsearch/rest/action/search/RestCountAction.java @@ -72,7 +72,7 @@ public class RestCountAction extends BaseRestHandler { public RestChannelConsumer prepareRequest(final RestRequest request, final NodeClient client) throws IOException { SearchRequest countRequest = new SearchRequest(Strings.splitStringByCommaToArray(request.param("index"))); countRequest.indicesOptions(IndicesOptions.fromRequest(request, countRequest.indicesOptions())); - SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().size(0); + SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().size(0).trackTotalHits(true); countRequest.source(searchSourceBuilder); request.withContentOrSourceParamParserOrNull(parser -> { if (parser == null) { diff --git a/server/src/main/java/org/elasticsearch/rest/action/search/RestSearchAction.java b/server/src/main/java/org/elasticsearch/rest/action/search/RestSearchAction.java index da773efed58..78082dd3641 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/search/RestSearchAction.java +++ b/server/src/main/java/org/elasticsearch/rest/action/search/RestSearchAction.java @@ -173,6 +173,7 @@ public class RestSearchAction extends BaseRestHandler { searchRequest.routing(request.param("routing")); searchRequest.preference(request.param("preference")); searchRequest.indicesOptions(IndicesOptions.fromRequest(request, searchRequest.indicesOptions())); + checkRestTotalHits(request, searchRequest); } @@ -240,6 +241,7 @@ public class RestSearchAction extends BaseRestHandler { searchSourceBuilder.trackScores(request.paramAsBoolean("track_scores", false)); } + if (request.hasParam("track_total_hits")) { if (Booleans.isBoolean(request.param("track_total_hits"))) { searchSourceBuilder.trackTotalHits( @@ -289,17 +291,26 @@ public class RestSearchAction extends BaseRestHandler { } /** - * Throws an {@link IllegalArgumentException} if {@link #TOTAL_HITS_AS_INT_PARAM} - * is used in conjunction with a lower bound value for the track_total_hits option. + * Modify the search request to accurately count the total hits that match the query + * if {@link #TOTAL_HITS_AS_INT_PARAM} is set. + * + * @throws IllegalArgumentException if {@link #TOTAL_HITS_AS_INT_PARAM} + * is used in conjunction with a lower bound value (other than {@link SearchContext#DEFAULT_TRACK_TOTAL_HITS_UP_TO}) + * for the track_total_hits option. */ public static void checkRestTotalHits(RestRequest restRequest, SearchRequest searchRequest) { - int trackTotalHitsUpTo = searchRequest.source() == null ? - SearchContext.DEFAULT_TRACK_TOTAL_HITS_UP_TO : searchRequest.source().trackTotalHitsUpTo(); - if (trackTotalHitsUpTo == SearchContext.TRACK_TOTAL_HITS_ACCURATE || - trackTotalHitsUpTo == SearchContext.TRACK_TOTAL_HITS_DISABLED) { - return ; + boolean totalHitsAsInt = restRequest.paramAsBoolean(TOTAL_HITS_AS_INT_PARAM, false); + if (totalHitsAsInt == false) { + return; } - if (restRequest.paramAsBoolean(TOTAL_HITS_AS_INT_PARAM, false)) { + if (searchRequest.source() == null) { + searchRequest.source(new SearchSourceBuilder()); + } + Integer trackTotalHitsUpTo = searchRequest.source().trackTotalHitsUpTo(); + if (trackTotalHitsUpTo == null) { + searchRequest.source().trackTotalHits(true); + } else if (trackTotalHitsUpTo != SearchContext.TRACK_TOTAL_HITS_ACCURATE + && trackTotalHitsUpTo != SearchContext.TRACK_TOTAL_HITS_DISABLED) { throw new IllegalArgumentException("[" + TOTAL_HITS_AS_INT_PARAM + "] cannot be used " + "if the tracking of total hits is not accurate, got " + trackTotalHitsUpTo); } diff --git a/server/src/main/java/org/elasticsearch/search/SearchService.java b/server/src/main/java/org/elasticsearch/search/SearchService.java index 5e2758eb5b8..ef255c8af7a 100644 --- a/server/src/main/java/org/elasticsearch/search/SearchService.java +++ b/server/src/main/java/org/elasticsearch/search/SearchService.java @@ -775,10 +775,14 @@ public class SearchService extends AbstractLifecycleComponent implements IndexEv } } context.trackScores(source.trackScores()); - if (source.trackTotalHits() == false && context.scrollContext() != null) { + if (source.trackTotalHitsUpTo() != null + && source.trackTotalHitsUpTo() != SearchContext.TRACK_TOTAL_HITS_ACCURATE + && context.scrollContext() != null) { throw new SearchContextException(context, "disabling [track_total_hits] is not allowed in a scroll context"); } - context.trackTotalHitsUpTo(source.trackTotalHitsUpTo()); + if (source.trackTotalHitsUpTo() != null) { + context.trackTotalHitsUpTo(source.trackTotalHitsUpTo()); + } if (source.minScore() != null) { context.minimumScore(source.minScore()); } diff --git a/server/src/main/java/org/elasticsearch/search/builder/SearchSourceBuilder.java b/server/src/main/java/org/elasticsearch/search/builder/SearchSourceBuilder.java index 81dd84ad8e4..f5c99fc5137 100644 --- a/server/src/main/java/org/elasticsearch/search/builder/SearchSourceBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/builder/SearchSourceBuilder.java @@ -69,7 +69,6 @@ import java.util.Objects; import java.util.stream.Collectors; import static org.elasticsearch.index.query.AbstractQueryBuilder.parseInnerQueryBuilder; -import static org.elasticsearch.search.internal.SearchContext.DEFAULT_TRACK_TOTAL_HITS_UP_TO; import static org.elasticsearch.search.internal.SearchContext.TRACK_TOTAL_HITS_ACCURATE; import static org.elasticsearch.search.internal.SearchContext.TRACK_TOTAL_HITS_DISABLED; @@ -158,7 +157,7 @@ public final class SearchSourceBuilder implements Writeable, ToXContentObject, R private boolean trackScores = false; - private int trackTotalHitsUpTo = DEFAULT_TRACK_TOTAL_HITS_UP_TO; + private Integer trackTotalHitsUpTo; private SearchAfterBuilder searchAfterBuilder; @@ -261,7 +260,7 @@ public final class SearchSourceBuilder implements Writeable, ToXContentObject, R sliceBuilder = in.readOptionalWriteable(SliceBuilder::new); collapse = in.readOptionalWriteable(CollapseBuilder::new); if (in.getVersion().onOrAfter(Version.V_7_0_0)) { - trackTotalHitsUpTo = in.readInt(); + trackTotalHitsUpTo = in.readOptionalInt(); } else { trackTotalHitsUpTo = in.readBoolean() ? TRACK_TOTAL_HITS_ACCURATE : TRACK_TOTAL_HITS_DISABLED; } @@ -327,9 +326,9 @@ public final class SearchSourceBuilder implements Writeable, ToXContentObject, R out.writeOptionalWriteable(sliceBuilder); out.writeOptionalWriteable(collapse); if (out.getVersion().onOrAfter(Version.V_7_0_0)) { - out.writeInt(trackTotalHitsUpTo); + out.writeOptionalInt(trackTotalHitsUpTo); } else { - out.writeBoolean(trackTotalHitsUpTo > SearchContext.TRACK_TOTAL_HITS_DISABLED); + out.writeBoolean(trackTotalHitsUpTo == null ? true : trackTotalHitsUpTo > SearchContext.TRACK_TOTAL_HITS_DISABLED); } } @@ -568,16 +567,17 @@ public final class SearchSourceBuilder implements Writeable, ToXContentObject, R /** * Indicates if the total hit count for the query should be tracked. */ - public boolean trackTotalHits() { - return trackTotalHitsUpTo == TRACK_TOTAL_HITS_ACCURATE; - } - public SearchSourceBuilder trackTotalHits(boolean trackTotalHits) { this.trackTotalHitsUpTo = trackTotalHits ? TRACK_TOTAL_HITS_ACCURATE : TRACK_TOTAL_HITS_DISABLED; return this; } - public int trackTotalHitsUpTo() { + /** + * Returns the total hit count that should be tracked or null if the value is unset. + * Defaults to null. + */ + @Nullable + public Integer trackTotalHitsUpTo() { return trackTotalHitsUpTo; } @@ -1289,7 +1289,7 @@ public final class SearchSourceBuilder implements Writeable, ToXContentObject, R builder.field(TRACK_SCORES_FIELD.getPreferredName(), true); } - if (trackTotalHitsUpTo != SearchContext.DEFAULT_TRACK_TOTAL_HITS_UP_TO) { + if (trackTotalHitsUpTo != null) { builder.field(TRACK_TOTAL_HITS_FIELD.getPreferredName(), trackTotalHitsUpTo); } diff --git a/server/src/main/java/org/elasticsearch/search/internal/SearchContext.java b/server/src/main/java/org/elasticsearch/search/internal/SearchContext.java index bd6d9c501c8..2c2aedfcf74 100644 --- a/server/src/main/java/org/elasticsearch/search/internal/SearchContext.java +++ b/server/src/main/java/org/elasticsearch/search/internal/SearchContext.java @@ -84,7 +84,7 @@ public abstract class SearchContext extends AbstractRefCounted implements Releas public static final int DEFAULT_TERMINATE_AFTER = 0; public static final int TRACK_TOTAL_HITS_ACCURATE = Integer.MAX_VALUE; public static final int TRACK_TOTAL_HITS_DISABLED = -1; - public static final int DEFAULT_TRACK_TOTAL_HITS_UP_TO = TRACK_TOTAL_HITS_ACCURATE; + public static final int DEFAULT_TRACK_TOTAL_HITS_UP_TO = 10000; private Map> clearables = null; private final AtomicBoolean closed = new AtomicBoolean(false); diff --git a/server/src/main/java/org/elasticsearch/search/query/EarlyTerminatingCollector.java b/server/src/main/java/org/elasticsearch/search/query/EarlyTerminatingCollector.java index 8b17437740c..2cfcdf1ae66 100644 --- a/server/src/main/java/org/elasticsearch/search/query/EarlyTerminatingCollector.java +++ b/server/src/main/java/org/elasticsearch/search/query/EarlyTerminatingCollector.java @@ -41,6 +41,7 @@ public class EarlyTerminatingCollector extends FilterCollector { private final int maxCountHits; private int numCollected; private boolean forceTermination; + private boolean earlyTerminated; /** * Ctr @@ -58,6 +59,7 @@ public class EarlyTerminatingCollector extends FilterCollector { @Override public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { if (numCollected >= maxCountHits) { + earlyTerminated = true; if (forceTermination) { throw new EarlyTerminationException("early termination [CountBased]"); } else { @@ -68,6 +70,7 @@ public class EarlyTerminatingCollector extends FilterCollector { @Override public void collect(int doc) throws IOException { if (++numCollected > maxCountHits) { + earlyTerminated = true; if (forceTermination) { throw new EarlyTerminationException("early termination [CountBased]"); } else { @@ -78,4 +81,11 @@ public class EarlyTerminatingCollector extends FilterCollector { }; }; } + + /** + * Returns true if this collector has early terminated. + */ + public boolean hasEarlyTerminated() { + return earlyTerminated; + } } diff --git a/server/src/main/java/org/elasticsearch/search/query/TopDocsCollectorContext.java b/server/src/main/java/org/elasticsearch/search/query/TopDocsCollectorContext.java index 2314d11e7e3..1ccc8f4cb92 100644 --- a/server/src/main/java/org/elasticsearch/search/query/TopDocsCollectorContext.java +++ b/server/src/main/java/org/elasticsearch/search/query/TopDocsCollectorContext.java @@ -112,8 +112,11 @@ abstract class TopDocsCollectorContext extends QueryCollectorContext { this.collector = hitCountCollector; this.hitCountSupplier = () -> new TotalHits(hitCountCollector.getTotalHits(), TotalHits.Relation.EQUAL_TO); } else { - this.collector = new EarlyTerminatingCollector(hitCountCollector, trackTotalHitsUpTo, false); - this.hitCountSupplier = () -> new TotalHits(hitCount, TotalHits.Relation.EQUAL_TO); + EarlyTerminatingCollector col = + new EarlyTerminatingCollector(hitCountCollector, trackTotalHitsUpTo, false); + this.collector = col; + this.hitCountSupplier = () -> new TotalHits(hitCountCollector.getTotalHits(), + col.hasEarlyTerminated() ? TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO : TotalHits.Relation.EQUAL_TO); } } else { this.collector = new EarlyTerminatingCollector(hitCountCollector, 0, false); @@ -157,7 +160,7 @@ abstract class TopDocsCollectorContext extends QueryCollectorContext { this.sortFmt = sortAndFormats == null ? new DocValueFormat[] { DocValueFormat.RAW } : sortAndFormats.formats; this.topDocsCollector = collapseContext.createTopDocs(sort, numHits); - MaxScoreCollector maxScoreCollector = null; + MaxScoreCollector maxScoreCollector; if (trackMaxScore) { maxScoreCollector = new MaxScoreCollector(); maxScoreSupplier = maxScoreCollector::getMaxScore; diff --git a/server/src/test/java/org/elasticsearch/search/profile/query/QueryProfilerIT.java b/server/src/test/java/org/elasticsearch/search/profile/query/QueryProfilerIT.java index 14686aff209..27b11577669 100644 --- a/server/src/test/java/org/elasticsearch/search/profile/query/QueryProfilerIT.java +++ b/server/src/test/java/org/elasticsearch/search/profile/query/QueryProfilerIT.java @@ -53,6 +53,7 @@ public class QueryProfilerIT extends ESIntegTestCase { * This test simply checks to make sure nothing crashes. Test indexes 100-150 documents, * constructs 20-100 random queries and tries to profile them */ + @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-8658") public void testProfileQuery() throws Exception { createIndex("test"); ensureGreen(); @@ -79,6 +80,7 @@ public class QueryProfilerIT extends ESIntegTestCase { SearchResponse resp = client().prepareSearch() .setQuery(q) + .setTrackTotalHits(true) .setProfile(true) .setSearchType(SearchType.QUERY_THEN_FETCH) .get(); diff --git a/test/framework/src/main/java/org/elasticsearch/search/RandomSearchRequestGenerator.java b/test/framework/src/main/java/org/elasticsearch/search/RandomSearchRequestGenerator.java index 6ec2732aaf9..58dbe869b5c 100644 --- a/test/framework/src/main/java/org/elasticsearch/search/RandomSearchRequestGenerator.java +++ b/test/framework/src/main/java/org/elasticsearch/search/RandomSearchRequestGenerator.java @@ -165,7 +165,7 @@ public class RandomSearchRequestGenerator { builder.trackTotalHits(randomBoolean()); } else { builder.trackTotalHitsUpTo( - randomIntBetween(SearchContext.TRACK_TOTAL_HITS_DISABLED, SearchContext.DEFAULT_TRACK_TOTAL_HITS_UP_TO) + randomIntBetween(SearchContext.TRACK_TOTAL_HITS_DISABLED, SearchContext.TRACK_TOTAL_HITS_ACCURATE) ); } } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/persistence/BatchedDocumentsIteratorTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/persistence/BatchedDocumentsIteratorTests.java index c301a0b3697..0024eb5f8c6 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/persistence/BatchedDocumentsIteratorTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/persistence/BatchedDocumentsIteratorTests.java @@ -17,6 +17,7 @@ import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.SearchHits; +import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xpack.ml.test.SearchHitBuilder; import org.junit.Before; @@ -139,7 +140,7 @@ public class BatchedDocumentsIteratorTests extends ESTestCase { assertThat(searchRequest.scroll().keepAlive(), equalTo(TimeValue.timeValueMinutes(5))); assertThat(searchRequest.types().length, equalTo(0)); assertThat(searchRequest.source().query(), equalTo(QueryBuilders.matchAllQuery())); - assertThat(searchRequest.source().trackTotalHits(), is(true)); + assertThat(searchRequest.source().trackTotalHitsUpTo(), is(SearchContext.TRACK_TOTAL_HITS_ACCURATE)); } private void assertSearchScrollRequests(int expectedCount) { diff --git a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/esnative/NativeUsersStore.java b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/esnative/NativeUsersStore.java index 47c32489ae1..3a6fb0ea3b3 100644 --- a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/esnative/NativeUsersStore.java +++ b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/esnative/NativeUsersStore.java @@ -175,6 +175,7 @@ public class NativeUsersStore { client.prepareSearch(SECURITY_INDEX_NAME) .setQuery(QueryBuilders.termQuery(Fields.TYPE.getPreferredName(), USER_DOC_TYPE)) .setSize(0) + .setTrackTotalHits(true) .request(), new ActionListener() { @Override @@ -578,6 +579,7 @@ public class NativeUsersStore { securityIndex.checkIndexVersionThenExecute(listener::onFailure, () -> executeAsyncWithOrigin(client.threadPool().getThreadContext(), SECURITY_ORIGIN, client.prepareSearch(SECURITY_INDEX_NAME) + .setTrackTotalHits(true) .setQuery(QueryBuilders.termQuery(Fields.TYPE.getPreferredName(), RESERVED_USER_TYPE)) .setFetchSource(true).request(), new ActionListener() { diff --git a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authz/store/NativeRolesStore.java b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authz/store/NativeRolesStore.java index a36f830ceac..cbc66235d30 100644 --- a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authz/store/NativeRolesStore.java +++ b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authz/store/NativeRolesStore.java @@ -253,6 +253,7 @@ public class NativeRolesStore implements BiConsumer, ActionListener< client.prepareMultiSearch() .add(client.prepareSearch(SecurityIndexManager.SECURITY_INDEX_NAME) .setQuery(QueryBuilders.termQuery(RoleDescriptor.Fields.TYPE.getPreferredName(), ROLE_TYPE)) + .setTrackTotalHits(true) .setSize(0)) .add(client.prepareSearch(SecurityIndexManager.SECURITY_INDEX_NAME) .setQuery(QueryBuilders.boolQuery() @@ -262,12 +263,14 @@ public class NativeRolesStore implements BiConsumer, ActionListener< .should(existsQuery("indices.field_security.except")) // for backwardscompat with 2.x .should(existsQuery("indices.fields")))) + .setTrackTotalHits(true) .setSize(0) .setTerminateAfter(1)) .add(client.prepareSearch(SecurityIndexManager.SECURITY_INDEX_NAME) .setQuery(QueryBuilders.boolQuery() .must(QueryBuilders.termQuery(RoleDescriptor.Fields.TYPE.getPreferredName(), ROLE_TYPE)) .filter(existsQuery("indices.query"))) + .setTrackTotalHits(true) .setSize(0) .setTerminateAfter(1)) .request(), diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/execution/search/Querier.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/execution/search/Querier.java index 16a6a4135b4..ff02ed85818 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/execution/search/Querier.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/execution/search/Querier.java @@ -111,8 +111,13 @@ public class Querier { } public static SearchRequest prepareRequest(Client client, SearchSourceBuilder source, TimeValue timeout, String... indices) { - SearchRequest search = client.prepareSearch(indices).setSource(source).setTimeout(timeout).request(); - search.allowPartialSearchResults(false); + SearchRequest search = client.prepareSearch(indices) + // always track total hits accurately + .setTrackTotalHits(true) + .setAllowPartialSearchResults(false) + .setSource(source) + .setTimeout(timeout) + .request(); return search; }