[Docs] Adding aggregation sections to high level client docs (#25707)

This adds a section about how to add aggregations to the SearchSourceBuilder and how
to retrieve them from a SearchRepsonse to the documentation for the high level rest client.
This commit is contained in:
Christoph Büscher 2017-07-14 12:47:47 +02:00 committed by GitHub
parent 4f0dc5bf32
commit f809a12493
2 changed files with 166 additions and 19 deletions

View File

@ -40,6 +40,14 @@ import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.search.Scroll;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.aggregations.Aggregation;
import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.elasticsearch.search.aggregations.Aggregations;
import org.elasticsearch.search.aggregations.bucket.range.Range;
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
import org.elasticsearch.search.aggregations.bucket.terms.Terms.Bucket;
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
import org.elasticsearch.search.aggregations.metrics.avg.Avg;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.sort.ScoreSortBuilder;
import org.elasticsearch.search.sort.SortOrder;
@ -85,7 +93,7 @@ public class SearchDocumentationIT extends ESRestHighLevelClientTestCase {
request.add(new IndexRequest("posts", "doc", "3")
.source(XContentType.JSON, "title", "The Future of Federated Search in Elasticsearch", "user",
Arrays.asList("kimchy", "tanguy"), "innerObject", Collections.singletonMap("key", "value")));
request.setRefreshPolicy(WriteRequest.RefreshPolicy.WAIT_UNTIL);
request.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE);
BulkResponse bulkResponse = client.bulk(request);
assertSame(bulkResponse.status(), RestStatus.OK);
assertFalse(bulkResponse.hasFailures());
@ -99,8 +107,8 @@ public class SearchDocumentationIT extends ESRestHighLevelClientTestCase {
}
{
// tag::search-request-indices-types
SearchRequest searchRequest = new SearchRequest("posts");
searchRequest.types("doc");
SearchRequest searchRequest = new SearchRequest("posts"); // <1>
searchRequest.types("doc"); // <2>
// end::search-request-indices-types
// tag::search-request-routing
searchRequest.routing("routing"); // <1>
@ -199,6 +207,81 @@ public class SearchDocumentationIT extends ESRestHighLevelClientTestCase {
}
}
@SuppressWarnings({ "unused", "unchecked" })
public void testSearchRequestAggregations() throws IOException {
RestHighLevelClient client = highLevelClient();
{
BulkRequest request = new BulkRequest();
request.add(new IndexRequest("posts", "doc", "1")
.source(XContentType.JSON, "company", "Elastic", "age", 20));
request.add(new IndexRequest("posts", "doc", "2")
.source(XContentType.JSON, "company", "Elastic", "age", 30));
request.add(new IndexRequest("posts", "doc", "3")
.source(XContentType.JSON, "company", "Elastic", "age", 40));
request.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE);
BulkResponse bulkResponse = client.bulk(request);
assertSame(bulkResponse.status(), RestStatus.OK);
assertFalse(bulkResponse.hasFailures());
}
{
SearchRequest searchRequest = new SearchRequest();
// tag::search-request-aggregations
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
TermsAggregationBuilder aggregation = AggregationBuilders.terms("by_company")
.field("company.keyword");
aggregation.subAggregation(AggregationBuilders.avg("average_age")
.field("age"));
searchSourceBuilder.aggregation(aggregation);
// end::search-request-aggregations
searchSourceBuilder.query(QueryBuilders.matchAllQuery());
searchRequest.source(searchSourceBuilder);
SearchResponse searchResponse = client.search(searchRequest);
{
// tag::search-request-aggregations-get
Aggregations aggregations = searchResponse.getAggregations();
Terms byCompanyAggregation = aggregations.get("by_company"); // <1>
Bucket elasticBucket = byCompanyAggregation.getBucketByKey("Elastic"); // <2>
Avg averageAge = elasticBucket.getAggregations().get("average_age"); // <3>
double avg = averageAge.getValue();
// end::search-request-aggregations-get
try {
// tag::search-request-aggregations-get-wrongCast
Range range = aggregations.get("by_company"); // <1>
// end::search-request-aggregations-get-wrongCast
} catch (ClassCastException ex) {
assertEquals("org.elasticsearch.search.aggregations.bucket.terms.ParsedStringTerms"
+ " cannot be cast to org.elasticsearch.search.aggregations.bucket.range.Range", ex.getMessage());
}
assertEquals(3, elasticBucket.getDocCount());
assertEquals(30, avg, 0.0);
}
Aggregations aggregations = searchResponse.getAggregations();
{
// tag::search-request-aggregations-asMap
Map<String, Aggregation> aggregationMap = aggregations.getAsMap();
Terms companyAggregation = (Terms) aggregationMap.get("by_company");
// end::search-request-aggregations-asMap
}
{
// tag::search-request-aggregations-asList
List<Aggregation> aggregationList = aggregations.asList();
// end::search-request-aggregations-asList
}
{
// tag::search-request-aggregations-iterator
for (Aggregation agg : aggregations) {
String type = agg.getType();
if (type.equals(TermsAggregationBuilder.NAME)) {
Bucket elasticBucket = ((Terms) agg).getBucketByKey("Elastic");
long numberOfDocs = elasticBucket.getDocCount();
}
}
// end::search-request-aggregations-iterator
}
}
}
public void testScroll() throws IOException {
RestHighLevelClient client = highLevelClient();
{
@ -209,7 +292,7 @@ public class SearchDocumentationIT extends ESRestHighLevelClientTestCase {
.source(XContentType.JSON, "title", "Current status and upcoming changes in Elasticsearch"));
request.add(new IndexRequest("posts", "doc", "3")
.source(XContentType.JSON, "title", "The Future of Federated Search in Elasticsearch"));
request.setRefreshPolicy(WriteRequest.RefreshPolicy.WAIT_UNTIL);
request.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE);
BulkResponse bulkResponse = client.bulk(request);
assertSame(bulkResponse.status(), RestStatus.OK);
assertFalse(bulkResponse.hasFailures());

View File

@ -8,7 +8,7 @@ The `SearchRequest` is used for any operation that has to do with searching
documents, aggregations, suggestions and also offers ways of requesting
highlighting on the resulting documents.
In its most basic form, a query can be added to the request like this:
In its most basic form, we can add a query to the request:
["source","java",subs="attributes,callouts,macros"]
--------------------------------------------------
@ -16,21 +16,19 @@ include-tagged::{doc-tests}/SearchDocumentationIT.java[search-request-basic]
--------------------------------------------------
<1> Creates the `SeachRequest`. Without arguments this runs against all indices.
<2> Most parameters of the search can be added to the `SearchSourceBuilder`
which contains everything that
in the Rest API would be placed in the search request body.
<2> Most search parameters are added to the `SearchSourceBuilder`. It offers setters for everything that goes into the search request body.
<3> Add a `match_all` query to the `SearchSourceBuilder`.
==== Optional arguments
===== Optional arguments
Lets first look at some of the optional argument of a `SearchRequest`.
First of all, the request can be restricted to one or more indices using the
constructor or to on or more types using a setter:
Let's first look at some of the optional arguments of a `SearchRequest`:
["source","java",subs="attributes,callouts,macros"]
--------------------------------------------------
include-tagged::{doc-tests}/SearchDocumentationIT.java[search-request-indices-types]
--------------------------------------------------
<1> Restricts the request to an index
<2> Limits the request to a type
There are a couple of other interesting optional parameters:
@ -52,9 +50,9 @@ how wildcard expressions are expanded
include-tagged::{doc-tests}/SearchDocumentationIT.java[search-request-preference]
--------------------------------------------------
<1> Use the preference parameter e.g. to execute the search to prefer local
shards. The The default is to randomize across shards.
shards. The default is to randomize across shards.
==== Using the SearchSourceBuilder
===== Using the SearchSourceBuilder
Most options controlling the search behavior can be set on the
`SearchSourceBuilder`,
@ -84,12 +82,25 @@ After this, the `SearchSourceBuilder` only needs to be added to the
include-tagged::{doc-tests}/SearchDocumentationIT.java[search-source-setter]
--------------------------------------------------
===== Requesting Aggregations
Aggregations can be added to the search by first creating the appropriate
`AggregationBuilder` and then setting it on the `SearchSourceBuilder`. In the
following example we create a `terms` aggregation on company names with a
sub-aggregation on the average age of employees in the company:
["source","java",subs="attributes,callouts,macros"]
--------------------------------------------------
include-tagged::{doc-tests}/SearchDocumentationIT.java[search-request-aggregations]
--------------------------------------------------
We will later see how to <<java-rest-high-retrieve-aggs,access aggregations>> in the `SearchResponse`.
[[java-rest-high-document-search-sync]]
==== Synchronous Execution
When executing a `SearchRequest` in the following manner, the client waits
for the `SearchResponse` to be returned before continuing with code execution:
for the `SearchResponse` to be returned before continuing with code execution:
["source","java",subs="attributes,callouts,macros"]
--------------------------------------------------
@ -117,7 +128,7 @@ The `SearchResponse` that is returned by executing the search provides details
about the search execution itself as well as access to the documents returned.
First, there is useful information about the request execution itself, like the
HTTP status code, execution time or wether the request terminated early or timed
out:
out:
["source","java",subs="attributes,callouts,macros"]
--------------------------------------------------
@ -135,6 +146,9 @@ failures can also be handled by iterating over an array off
include-tagged::{doc-tests}/SearchDocumentationIT.java[search-response-2]
--------------------------------------------------
[[java-rest-high-retrieve-searchHits]]
===== Retrieving SearchHits
To get access to the returned documents, we need to first get the `SearchHits`
contained in the response:
@ -152,7 +166,7 @@ include-tagged::{doc-tests}/SearchDocumentationIT.java[search-hits-info]
--------------------------------------------------
Nested inside the `SearchHits` are the individual search results that can
be iterated over like this:
be iterated over:
["source","java",subs="attributes,callouts,macros"]
@ -169,12 +183,62 @@ include-tagged::{doc-tests}/SearchDocumentationIT.java[search-hits-singleHit-pro
--------------------------------------------------
Furthermore, it lets you get back the document source, either as a simple
JSON-String or as a map of key/value pairs. In this map, regular fields
JSON-String or as a map of key/value pairs. In this map, regular fields
are keyed by the field name and contain the field value. Multi-valued fields are
returned as lists of objects, nested objects as another key/value map. These
cases need to be case accordingly:
cases need to be cast accordingly:
["source","java",subs="attributes,callouts,macros"]
--------------------------------------------------
include-tagged::{doc-tests}/SearchDocumentationIT.java[search-hits-singleHit-source]
--------------------------------------------------
[[java-rest-high-retrieve-aggs]]
===== Retrieving Aggregations
Aggregations can be retrieved from the `SearchResponse` by first getting the
root of the aggregation tree, the `Aggregations` object, and then getting the
aggregation by name.
["source","java",subs="attributes,callouts,macros"]
--------------------------------------------------
include-tagged::{doc-tests}/SearchDocumentationIT.java[search-request-aggregations-get]
--------------------------------------------------
<1> Get the `by_company` terms aggregation
<2> Get the buckets that is keyed with `Elastic`
<3> Get the `average_age` sub-aggregation from that bucket
Note that if you access aggregations by name, you need to specify the
aggregation interface according to the type of aggregation you requested,
otherwise a `ClassCastException` will be thrown:
["source","java",subs="attributes,callouts,macros"]
--------------------------------------------------
include-tagged::{doc-tests}/SearchDocumentationIT.java[search-request-aggregations-get-wrongCast]
--------------------------------------------------
<1> This will throw an exception because "by_company" is a `terms` aggregation
but we try to retrieve it as a `range` aggregation
It is also possible to access all aggregations as a map that is keyed by the
aggregation name. In this case, the cast to the proper aggregation interface
needs to happen explicitly:
["source","java",subs="attributes,callouts,macros"]
--------------------------------------------------
include-tagged::{doc-tests}/SearchDocumentationIT.java[search-request-aggregations-asMap]
--------------------------------------------------
There are also getters that return all top level aggregations as a list:
["source","java",subs="attributes,callouts,macros"]
--------------------------------------------------
include-tagged::{doc-tests}/SearchDocumentationIT.java[search-request-aggregations-asList]
--------------------------------------------------
And last but not least you can iterate over all aggregations and then e.g.
decide how to further process them based on their type:
["source","java",subs="attributes,callouts,macros"]
--------------------------------------------------
include-tagged::{doc-tests}/SearchDocumentationIT.java[search-request-aggregations-iterator]
--------------------------------------------------