diff --git a/docs/reference/search/aggregations.asciidoc b/docs/reference/search/aggregations.asciidoc index 409dfc38f12..2c34ec20baf 100644 --- a/docs/reference/search/aggregations.asciidoc +++ b/docs/reference/search/aggregations.asciidoc @@ -1,31 +1,53 @@ [[search-aggregations]] == Aggregations -Aggregations grew out of the <> module and the long expirience of how users use it (and would like to use it) for real-time data analytics purposes. As such, it serves as the next generation replacement for the functionality we currently refer to as "faceting". +Aggregations grew out of the <> module and the long expirience of how users use it +(and would like to use it) for real-time data analytics purposes. As such, it serves as the next generation +replacement for the functionality we currently refer to as "faceting". -<> provide a great way to aggregate data within a document set context. This context is defined by the executed query in combination with the different levels of filters that can be defined (filtered queries, top level filters, and facet level filters). While powerful, their implementation is not designed from ground up to support complex aggregations and thus limited. +<> provide a great way to aggregate data within a document set context. +This context is defined by the executed query in combination with the different levels of filters that can be defined +(filtered queries, top level filters, and facet level filters). While powerful, their implementation is not designed +from ground up to support complex aggregations and thus limited. .Are facets deprecated? ********************************** -As the functionality facets offer is a subset of the the one offered by aggregations, over time, we would like to see users move to aggregations for all realtime data analytics. That said, we are well aware that such transitions/migrations take time, and for this reason we are keeping the facets around for the time being. Nonetheless, facets are and should be considered deprecated and will likely be removed in one of the future major releases. +As the functionality facets offer is a subset of the the one offered by aggregations, over time, we would like to +see users move to aggregations for all realtime data analytics. That said, we are well aware that such +transitions/migrations take time, and for this reason we are keeping the facets around for the time being. +Nonetheless, facets are and should be considered deprecated and will likely be removed in one of the future major +releases. ********************************** -The aggregations module breaks the barriers the current facet implementation put in place. The new name ("Aggregations") also indicate the intention here - a generic yet extremely powerful framework for building aggregations - any types of aggregations. +The aggregations module breaks the barriers the current facet implementation put in place. The new name ("Aggregations") +also indicate the intention here - a generic yet extremely powerful framework for building aggregations - any types of +aggregations. -An aggregation can be seen as a _unit-of-work_ that builds analytic information over a set of documents. The context of the execution defines what this document set is (e.g. a top level aggregation executes within the context of the executed query/filters of the search request). +An aggregation can be seen as a _unit-of-work_ that builds analytic information over a set of documents. The context of +the execution defines what this document set is (e.g. a top level aggregation executes within the context of the executed +query/filters of the search request). -There are many different types of aggregations, each with its own purpose and output. To better understand these types, it is often easier to break them into two main families: +There are many different types of aggregations, each with its own purpose and output. To better understand these types, +it is often easier to break them into two main families: _Bucketing_:: - A family of aggregations that build buckets, where each bucket is associated with a _key_ and a document criteria. When the aggregations is executed, the buckets criterias are evaluated on every document in the context and when matches, the document is considered to "fall in" the relevant bucket. By the end of the aggreagation process, we'll end up with a list of buckets - each one with a set of documents that "belong" to it. + A family of aggregations that build buckets, where each bucket is associated with a _key_ and a document + criteria. When the aggregations is executed, the buckets criterias are evaluated on every document in + the context and when matches, the document is considered to "fall in" the relevant bucket. By the end of + the aggreagation process, we'll end up with a list of buckets - each one with a set of documents that + "belong" to it. _Metric_:: Aggregations that keep track and compute metrics over a set of documents -The interesting part comes next, since each bucket effectively defines a document set (all documents belonging to the bucket), one can potentially associated aggregations on the bucket level, and those will execute within the context of that bucket. This is where the real power of aggregations kicks in: *aggregations can be nested!* +The interesting part comes next, since each bucket effectively defines a document set (all documents belonging to +the bucket), one can potentially associated aggregations on the bucket level, and those will execute within the context +of that bucket. This is where the real power of aggregations kicks in: *aggregations can be nested!* NOTE: Bucketing aggregations can have sub-aggregations (bucketing or metric). The sub aggregations will be computed for - each of the buckets their parent aggregation generates. There is not hard limit on the level/depth of nested aggregations (one can nest an aggregation under a "parent" aggregation which is itself a sub-aggregation of another highter aggregations) + each of the buckets their parent aggregation generates. There is not hard limit on the level/depth of nested + aggregations (one can nest an aggregation under a "parent" aggregation which is itself a sub-aggregation of + another higher aggregations) [float] === Structuring Aggregations @@ -45,21 +67,43 @@ The following snippet captures the basic structure of aggregations: } -------------------------------------------------- -The `aggregations` object (a.k.a `aggs` for short) in the json holds the aggregations to be computed. Each aggregation is associated with a logical name that the user defines (e.g. if the aggregation computes the average price, then it'll make sense to name it `avg_price`). These logical names will also be used to uniquely identify the aggregations in the response. Each aggregation has a specific type (`` in the above snippet) and is typically the first key within the named aggregation body. Each type of aggregation define its own body, depending on the nature of the aggregation (eg. an `avg` aggregation on a specific field will define the field on which the avg will be calculated). At the same level of the aggregation type definition, one can optionally define a set of additional aggregations, though this only makes sense if the aggregation you defined is of a bucketing nature. In this scenario, the sub-aggregations you define on the bucketing aggregation level will be computed for all the buckets built by the bucketing aggregation. For example, if the you define a set of aggregations under the `range` aggregation, the sub-aggregations will be computed for each of the range buckets that are defined. +The `aggregations` object (a.k.a `aggs` for short) in the json holds the aggregations to be computed. Each aggregation +is associated with a logical name that the user defines (e.g. if the aggregation computes the average price, then it'll +make sense to name it `avg_price`). These logical names will also be used to uniquely identify the aggregations in the +response. Each aggregation has a specific type (`` in the above snippet) and is typically the first +key within the named aggregation body. Each type of aggregation define its own body, depending on the nature of the +aggregation (eg. an `avg` aggregation on a specific field will define the field on which the avg will be calculated). +At the same level of the aggregation type definition, one can optionally define a set of additional aggregations, +though this only makes sense if the aggregation you defined is of a bucketing nature. In this scenario, the +sub-aggregations you define on the bucketing aggregation level will be computed for all the buckets built by the +bucketing aggregation. For example, if the you define a set of aggregations under the `range` aggregation, the +sub-aggregations will be computed for each of the range buckets that are defined. [float] ==== Values Source -Some aggregations work on values extracted from the aggregated documents. Typically, the values will be extracted from a sepcific document field which is set under the `field` settings for the aggrations. It is also possible to define a `<>` that will generate the values (per document). +Some aggregations work on values extracted from the aggregated documents. Typically, the values will be extracted from +a sepcific document field which is set under the `field` settings for the aggrations. It is also possible to define a +`<>` that will generate the values (per document). -When both `field` and `script` settings are configured for the aggregation, the script will be treated as a `value script`. While normal scripts are evaluated on a document level (i.e. the script has access to all the data associated with the document), value scripts are evaluated on the *value* level. In this mode, the values are extracted from the configured `field` and the `script` is used to apply a "transformation" over these value/s +When both `field` and `script` settings are configured for the aggregation, the script will be treated as a +`value script`. While normal scripts are evaluated on a document level (i.e. the script has access to all the data +associated with the document), value scripts are evaluated on the *value* level. In this mode, the values are extracted +from the configured `field` and the `script` is used to apply a "transformation" over these value/s -["NOTE",id="metrics-script-note"] +["NOTE",id="aggs-script-note"] =============================== -When working with scripts, the `script_lang` and `params` settings can also be defined. The former defines the scripting language that is used (assuming the proper language is available in es either by default or as a plugin). The latter enables defining all the "dynamic" expressions in the script as parameters, and by that keep the script itself static between calls (this will ensure the use of the cached compiled scripts in elasticsearch). +When working with scripts, the `lang` and `params` settings can also be defined. The former defines the scripting +language that is used (assuming the proper language is available in es either by default or as a plugin). The latter +enables defining all the "dynamic" expressions in the script as parameters, and by that keep the script itself static +between calls (this will ensure the use of the cached compiled scripts in elasticsearch). =============================== -Scripts can generate a single value or multiple values per documents. When generating multiple values, once can use the `script_values_sorted` settings to indicate whether these values are sorted or not. Internally, elasticsearch can perform optimizations when dealing with sorted values (for example, with the `min` aggregations, knowing the values are sorted, elasticsearch will skip the iterations over all the values and rely on the first value in the list to be the minimum value among all other values associated with the same document). +Scripts can generate a single value or multiple values per documents. When generating multiple values, once can use the +`script_values_sorted` settings to indicate whether these values are sorted or not. Internally, elasticsearch can +perform optimizations when dealing with sorted values (for example, with the `min` aggregations, knowing the values are +sorted, elasticsearch will skip the iterations over all the values and rely on the first value in the list to be the +minimum value among all other values associated with the same document). [float] === Metrics Aggregations diff --git a/docs/reference/search/aggregations/bucket/datehistogram-aggregation.asciidoc b/docs/reference/search/aggregations/bucket/datehistogram-aggregation.asciidoc index 537207e936d..340233a6ee6 100644 --- a/docs/reference/search/aggregations/bucket/datehistogram-aggregation.asciidoc +++ b/docs/reference/search/aggregations/bucket/datehistogram-aggregation.asciidoc @@ -1,7 +1,13 @@ [[search-aggregations-bucket-datehistogram-aggregation]] === Date Histogram -A multi-bucket aggregation similar to the <> except it can only be applied on date values. Since dates are represented in elasticsearch internally as long values, it is possible to use the normal `histogram` on dates as well, though accuracy will be compromized. The reason for this is in the fact that time based intervals are not fixed (think of leap years and on the number of days in a month). For this reason, we need a spcial support for time based data. From functionality perspective, this historam supports the same features as the normal <>. The main difference though is that the interval can be specified by date/time expressions. +A multi-bucket aggregation similar to the <> except it can +only be applied on date values. Since dates are represented in elasticsearch internally as long values, it is possible +to use the normal `histogram` on dates as well, though accuracy will be compromized. The reason for this is in the fact +that time based intervals are not fixed (think of leap years and on the number of days in a month). For this reason, +we need a spcial support for time based data. From functionality perspective, this historam supports the same features +as the normal <>. The main difference though is that the +interval can be specified by date/time expressions. Requesting a month length bucket intervals @@ -39,29 +45,49 @@ Other available expressions for interval: `year`, `quarter`, `week`, `day`, `hou ==== Time Zone -By default, times are stored as UTC milliseconds since the epoch. Thus, all computation and "bucketing" / "rounding" is done on UTC. It is possible to provide a time zone (both pre rounding, and post rounding) value, which will cause all computations to take the relevant zone into account. The time returned for each bucket/entry is milliseconds since the epoch of the provided time zone. +By default, times are stored as UTC milliseconds since the epoch. Thus, all computation and "bucketing" / "rounding" is +done on UTC. It is possible to provide a time zone (both pre rounding, and post rounding) value, which will cause all +computations to take the relevant zone into account. The time returned for each bucket/entry is milliseconds since the +epoch of the provided time zone. -The parameters are `pre_zone` (pre rounding based on interval) and `post_zone` (post rounding based on interval). The `time_zone` parameter simply sets the `pre_zone` parameter. By default, those are set to `UTC`. +The parameters are `pre_zone` (pre rounding based on interval) and `post_zone` (post rounding based on interval). The +`time_zone` parameter simply sets the `pre_zone` parameter. By default, those are set to `UTC`. -The zone value accepts either a numeric value for the hours offset, for example: `"time_zone" : -2`. It also accepts a format of hours and minutes, like `"time_zone" : "-02:30"`. Another option is to provide a time zone accepted as one of the values listed here. +The zone value accepts either a numeric value for the hours offset, for example: `"time_zone" : -2`. It also accepts a +format of hours and minutes, like `"time_zone" : "-02:30"`. Another option is to provide a time zone accepted as one of +the values listed here. -Lets take an example. For `2012-04-01T04:15:30Z`, with a `pre_zone` of `-08:00`. For day interval, the actual time by applying the time zone and rounding falls under `2012-03-31`, so the returned value will be (in millis) of `2012-03-31T00:00:00Z` (UTC). For hour interval, applying the time zone results in `2012-03-31T20:15:30`, rounding it results in `2012-03-31T20:00:00`, but, we want to return it in UTC (`post_zone` is not set), so we convert it back to UTC: `2012-04-01T04:00:00Z`. Note, we are consistent in the results, returning the rounded value in UTC. +Lets take an example. For `2012-04-01T04:15:30Z`, with a `pre_zone` of `-08:00`. For day interval, the actual time by +applying the time zone and rounding falls under `2012-03-31`, so the returned value will be (in millis) of +`2012-03-31T00:00:00Z` (UTC). For hour interval, applying the time zone results in `2012-03-31T20:15:30`, rounding it +results in `2012-03-31T20:00:00`, but, we want to return it in UTC (`post_zone` is not set), so we convert it back to +UTC: `2012-04-01T04:00:00Z`. Note, we are consistent in the results, returning the rounded value in UTC. `post_zone` simply takes the result, and adds the relevant offset. -Sometimes, we want to apply the same conversion to UTC we did above for hour also for day (and up) intervals. We can set `pre_zone_adjust_large_interval` to `true`, which will apply the same conversion done for hour interval in the example, to day and above intervals (it can be set regardless of the interval, but only kick in when using day and higher intervals). +Sometimes, we want to apply the same conversion to UTC we did above for hour also for day (and up) intervals. We can +set `pre_zone_adjust_large_interval` to `true`, which will apply the same conversion done for hour interval in the +example, to day and above intervals (it can be set regardless of the interval, but only kick in when using day and +higher intervals). ==== Factor -The date histogram works on numeric values (since time is stored in milliseconds since the epoch in UTC). But, sometimes, systems will store a different resolution (like seconds since UTC) in a numeric field. The `factor` parameter can be used to change the value in the field to milliseconds to actual do the relevant rounding, and then be applied again to get to the original unit. For example, when storing in a numeric field seconds resolution, the factor can be set to 1000. +The date histogram works on numeric values (since time is stored in milliseconds since the epoch in UTC). But, +sometimes, systems will store a different resolution (like seconds since UTC) in a numeric field. The `factor` +parameter can be used to change the value in the field to milliseconds to actual do the relevant rounding, and then +be applied again to get to the original unit. For example, when storing in a numeric field seconds resolution, the +factor can be set to 1000. ==== Pre/Post Offset -Specific offsets can be provided for pre rounding and post rounding. The `pre_offset` for pre rounding, and `post_offset` for post rounding. The format is the date time format (`1h`, `1d`, etc...). +Specific offsets can be provided for pre rounding and post rounding. The `pre_offset` for pre rounding, and +`post_offset` for post rounding. The format is the date time format (`1h`, `1d`, etc...). ==== Keys -Since internally, dates are represented as 64bit numbers, these numbers are returned as the bucket keys (each key representing a date - milliseconds since the epoch). It is also possible to define a date format, which will result in returning the dates as formatted strings next to the numeric key values: +Since internally, dates are represented as 64bit numbers, these numbers are returned as the bucket keys (each key +representing a date - milliseconds since the epoch). It is also possible to define a date format, which will result in +returning the dates as formatted strings next to the numeric key values: [source,js] -------------------------------------------------- @@ -103,4 +129,6 @@ Response: } -------------------------------------------------- -Like with the normal <>, both document level scripts and value level scripts are supported. It is also possilbe to control the order of the returned buckets using the `order` settings and empty buckets can also be returned by setting the `empty_buckets` field to `true` (defaults to `false`). \ No newline at end of file +Like with the normal <>, both document level scripts and +value level scripts are supported. It is also possilbe to control the order of the returned buckets using the `order` +settings and empty buckets can also be returned by setting the `empty_buckets` field to `true` (defaults to `false`). \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/search/aggregations/ValuesSourceAggregationBuilder.java b/src/main/java/org/elasticsearch/search/aggregations/ValuesSourceAggregationBuilder.java index 6bea5b0636c..606b36ae1bf 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/ValuesSourceAggregationBuilder.java +++ b/src/main/java/org/elasticsearch/search/aggregations/ValuesSourceAggregationBuilder.java @@ -13,7 +13,7 @@ public abstract class ValuesSourceAggregationBuilder params; /** @@ -58,12 +58,12 @@ public abstract class ValuesSourceAggregationBuilder * Also see {@link #script(String)}. * - * @param scriptLang The language of the script. - * @return This builder (fluent interface support) + * @param lang The language of the script. + * @return This builder (fluent interface support) */ @SuppressWarnings("unchecked") - public B scriptLang(String scriptLang) { - this.scriptLang = scriptLang; + public B lang(String lang) { + this.lang = lang; return (B) this; } @@ -107,8 +107,8 @@ public abstract class ValuesSourceAggregationBuilder> extends ValuesSourceAggregationBuilder { +public abstract class AbstractRangeBuilder> extends ValuesSourceAggregationBuilder { protected static class Range implements ToXContent { @@ -44,7 +44,7 @@ public abstract class RangeBuilderBase> extends Va protected List ranges = Lists.newArrayList(); - protected RangeBuilderBase(String name, String type) { + protected AbstractRangeBuilder(String name, String type) { super(name, type); } diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeBuilder.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeBuilder.java index f192b4dec99..8a572771e02 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeBuilder.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeBuilder.java @@ -3,7 +3,7 @@ package org.elasticsearch.search.aggregations.bucket.range; /** * */ -public class RangeBuilder extends RangeBuilderBase { +public class RangeBuilder extends AbstractRangeBuilder { public RangeBuilder(String name) { super(name, InternalRange.TYPE.name()); diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeParser.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeParser.java index 9d567c4d378..b68b58b3146 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeParser.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeParser.java @@ -68,7 +68,7 @@ public class RangeParser implements Aggregator.Parser { field = parser.text(); } else if ("script".equals(currentFieldName)) { script = parser.text(); - } else if ("script_lang".equals(currentFieldName) || "scriptLang".equals(currentFieldName)) { + } else if ("lang".equals(currentFieldName)) { scriptLang = parser.text(); } } else if (token == XContentParser.Token.START_ARRAY) { diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/range/date/DateRangeBuilder.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/range/date/DateRangeBuilder.java index 06bef46dfc0..1396fba9cd1 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/range/date/DateRangeBuilder.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/range/date/DateRangeBuilder.java @@ -1,14 +1,14 @@ package org.elasticsearch.search.aggregations.bucket.range.date; import org.elasticsearch.common.xcontent.XContentBuilder; -import org.elasticsearch.search.aggregations.bucket.range.RangeBuilderBase; +import org.elasticsearch.search.aggregations.bucket.range.AbstractRangeBuilder; import java.io.IOException; /** * */ -public class DateRangeBuilder extends RangeBuilderBase { +public class DateRangeBuilder extends AbstractRangeBuilder { private String format; diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/range/date/DateRangeParser.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/range/date/DateRangeParser.java index 906cf338443..7d2ffc526cf 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/range/date/DateRangeParser.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/range/date/DateRangeParser.java @@ -74,7 +74,7 @@ public class DateRangeParser implements Aggregator.Parser { field = parser.text(); } else if ("script".equals(currentFieldName)) { script = parser.text(); - } else if ("script_lang".equals(currentFieldName) || "scriptLang".equals(currentFieldName)) { + } else if ("lang".equals(currentFieldName)) { scriptLang = parser.text(); } else if ("format".equals(currentFieldName)) { format = parser.text(); diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/range/ipv4/IPv4RangeBuilder.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/range/ipv4/IPv4RangeBuilder.java index 084a16b7437..b42df3c1a93 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/range/ipv4/IPv4RangeBuilder.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/range/ipv4/IPv4RangeBuilder.java @@ -1,6 +1,6 @@ package org.elasticsearch.search.aggregations.bucket.range.ipv4; -import org.elasticsearch.search.aggregations.bucket.range.RangeBuilderBase; +import org.elasticsearch.search.aggregations.bucket.range.AbstractRangeBuilder; import org.elasticsearch.search.builder.SearchSourceBuilderException; import java.util.regex.Pattern; @@ -8,7 +8,7 @@ import java.util.regex.Pattern; /** * */ -public class IPv4RangeBuilder extends RangeBuilderBase { +public class IPv4RangeBuilder extends AbstractRangeBuilder { public static final long MAX_IP = 4294967296l; private static final Pattern MASK_PATTERN = Pattern.compile("[\\.|/]"); diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/range/ipv4/IpRangeParser.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/range/ipv4/IpRangeParser.java index 9bd802d249a..7c5958920c5 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/range/ipv4/IpRangeParser.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/range/ipv4/IpRangeParser.java @@ -73,7 +73,7 @@ public class IpRangeParser implements Aggregator.Parser { field = parser.text(); } else if ("script".equals(currentFieldName)) { script = parser.text(); - } else if ("script_lang".equals(currentFieldName) || "scriptLang".equals(currentFieldName)) { + } else if ("lang".equals(currentFieldName)) { scriptLang = parser.text(); } } else if (token == XContentParser.Token.START_ARRAY) { diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsParser.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsParser.java index fd5e5e7271d..4ea1085aed6 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsParser.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsParser.java @@ -84,7 +84,7 @@ public class TermsParser implements Aggregator.Parser { field = parser.text(); } else if ("script".equals(currentFieldName)) { script = parser.text(); - } else if ("script_lang".equals(currentFieldName) || "scriptLang".equals(currentFieldName)) { + } else if ("lang".equals(currentFieldName)) { scriptLang = parser.text(); } else if ("value_type".equals(currentFieldName) || "valueType".equals(currentFieldName)) { valueType = Terms.ValueType.resolveType(parser.text()); diff --git a/src/main/java/org/elasticsearch/search/aggregations/metrics/ValuesSourceMetricsAggregationBuilder.java b/src/main/java/org/elasticsearch/search/aggregations/metrics/ValuesSourceMetricsAggregationBuilder.java index 0ef53fde1de..7d320b9d5fe 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/metrics/ValuesSourceMetricsAggregationBuilder.java +++ b/src/main/java/org/elasticsearch/search/aggregations/metrics/ValuesSourceMetricsAggregationBuilder.java @@ -13,7 +13,7 @@ public abstract class ValuesSourceMetricsAggregationBuilder params; protected ValuesSourceMetricsAggregationBuilder(String name, String type) { @@ -33,8 +33,8 @@ public abstract class ValuesSourceMetricsAggregationBuilder