From bd28c9c44e779dc784bd59387682b35d441bd627 Mon Sep 17 00:00:00 2001 From: Colin Goodheart-Smithe Date: Fri, 17 Apr 2015 11:34:01 +0100 Subject: [PATCH] Documentation for the max_bucket reducer --- .../reducer/max-bucket-aggregation.asciidoc | 148 +++--------------- 1 file changed, 19 insertions(+), 129 deletions(-) diff --git a/docs/reference/search/aggregations/reducer/max-bucket-aggregation.asciidoc b/docs/reference/search/aggregations/reducer/max-bucket-aggregation.asciidoc index 659f3ff1930..ca6f274d189 100644 --- a/docs/reference/search/aggregations/reducer/max-bucket-aggregation.asciidoc +++ b/docs/reference/search/aggregations/reducer/max-bucket-aggregation.asciidoc @@ -1,16 +1,17 @@ [[search-aggregations-reducer-max-bucket-aggregation]] === Max Bucket Aggregation -A parent reducer aggregation which calculates the derivative of a specified metric in a parent histogram (or date_histogram) -aggregation. The specified metric must be numeric and the enclosing histogram must have `min_doc_count` set to `0`. +A sibling reducer aggregation which identifies the bucket(s) with the maximum value of a specified metric in a sibing aggregation +and outputs both the value and the key(s) of the bucket(s). The specified metric must be numeric and the sibling aggregation must +be a multi-bucket aggregation. -The following snippet calculates the derivative of the total monthly `sales`: +The following snippet calculates the maximum of the total monthly `sales`: [source,js] -------------------------------------------------- { "aggs" : { - "sales" : { + "sales_per_month" : { "date_histogram" : { "field" : "date", "interval" : "month" @@ -20,19 +21,20 @@ The following snippet calculates the derivative of the total monthly `sales`: "sum": { "field": "price" } - }, - "sales_deriv": { - "derivative": { - "buckets_paths": "sales" <1> - } } } + }, + "max_monthly_sales": { + "max_bucket": { + "buckets_paths": "sales_per_month>sales" <1> + } } } } -------------------------------------------------- -<1> `bucket_paths` instructs this derivative aggregation to use the output of the `sales` aggregation for the derivative +<1> `bucket_paths` instructs this max_bucket aggregation that we want the maximum value of the `sales` aggregation in the +"sales_per_month` date histogram. And the following may be the response: @@ -40,7 +42,7 @@ And the following may be the response: -------------------------------------------------- { "aggregations": { - "sales": { + "sales_per_month": { "buckets": [ { "key_as_string": "2015/01/01 00:00:00", @@ -48,7 +50,7 @@ And the following may be the response: "doc_count": 3, "sales": { "value": 550 - } <1> + } }, { "key_as_string": "2015/02/01 00:00:00", @@ -56,9 +58,6 @@ And the following may be the response: "doc_count": 2, "sales": { "value": 60 - }, - "sales_deriv": { - "value": -490 <2> } }, { @@ -67,126 +66,17 @@ And the following may be the response: "doc_count": 2, "sales": { "value": 375 - }, - "sales_deriv": { - "value": 315 } } ] + }, + "max_monthly_sales": { + "keys": ["2015/01/01 00:00:00"], <1> + "value": 550 } } } -------------------------------------------------- -<1> No derivative for the first bucket since we need at least 2 data points to calculate the derivative -<2> Derivative value units are implicitly defined by the `sales` aggregation and the parent histogram so in this case the units -would be $/month assuming the `price` field has units of $. - -==== Second Order Derivative - -A second order derivative can be calculated by chaining the derivative reducer aggregation onto the result of another derivative -reducer aggregation as in the following example which will calculate both the first and the second order derivative of the total -monthly sales: - -[source,js] --------------------------------------------------- -{ - "aggs" : { - "sales" : { - "date_histogram" : { - "field" : "date", - "interval" : "month" - }, - "aggs": { - "sales": { - "sum": { - "field": "price" - } - }, - "sales_deriv": { - "derivative": { - "buckets_paths": "sales" - } - }, - "sales_2nd_deriv": { - "derivative": { - "buckets_paths": "sales_deriv" <1> - } - } - } - } - } -} --------------------------------------------------- - -<1> `bucket_paths` for the second derivative points to the name of the first derivative - -And the following may be the response: - -[source,js] --------------------------------------------------- -{ - "aggregations": { - "sales": { - "buckets": [ - { - "key_as_string": "2015/01/01 00:00:00", - "key": 1420070400000, - "doc_count": 3, - "sales": { - "value": 550 - } <1> - }, - { - "key_as_string": "2015/02/01 00:00:00", - "key": 1422748800000, - "doc_count": 2, - "sales": { - "value": 60 - }, - "sales_deriv": { - "value": -490 - } <1> - }, - { - "key_as_string": "2015/03/01 00:00:00", - "key": 1425168000000, - "doc_count": 2, - "sales": { - "value": 375 - }, - "sales_deriv": { - "value": 315 - }, - "sales_2nd_deriv": { - "value": 805 - } - } - ] - } - } -} --------------------------------------------------- -<1> No second derivative for the first two buckets since we need at least 2 data points from the first derivative to calculate the -second derivative - -==== Dealing with gaps in the data - -There are a couple of reasons why the data output by the enclosing histogram may have gaps: - -* There are no documents matching the query for some buckets -* The data for a metric is missing in all of the documents falling into a bucket (this is most likely with either a small interval -on the enclosing histogram or with a query matching only a small number of documents) - -Where there is no data available in a bucket for a given metric it presents a problem for calculating the derivative value for both -the current bucket and the next bucket. In the derivative reducer aggregation has a `gap policy` parameter to define what the behavior -should be when a gap in the data is found. There are currently two options for controlling the gap policy: - -_ignore_:: - This option will not produce a derivative value for any buckets where the value in the current or previous bucket is - missing - -_insert_zeros_:: - This option will assume the missing value is `0` and calculate the derivative with the value `0`. - +<1> `keys` is an array of strings since the maximum value may be present in multiple buckets