From 75c553f466ac2046ab2c13546f7875f5ef4b2083 Mon Sep 17 00:00:00 2001 From: Alice Williams <88908598+alicejw-aws@users.noreply.github.com> Date: Wed, 6 Jul 2022 14:30:39 -0700 Subject: [PATCH] Multi-terms aggregations docs (#720) * for issue https://github.com/opensearch-project/documentation-website/issues/582 Signed-off-by: alicejw * fix extra blank lines Signed-off-by: alicejw * adding clarification Signed-off-by: alicejw * for updating per editorial feedback Signed-off-by: alicejw * doc review feedback Signed-off-by: alicejw * for doc review comments Signed-off-by: alicejw --- _opensearch/bucket-agg.md | 120 +++++++++++++++++++++++++++++++++++++- 1 file changed, 118 insertions(+), 2 deletions(-) diff --git a/_opensearch/bucket-agg.md b/_opensearch/bucket-agg.md index 43b2f97b..1f73fb4b 100644 --- a/_opensearch/bucket-agg.md +++ b/_opensearch/bucket-agg.md @@ -12,7 +12,7 @@ Bucket aggregations categorize sets of documents as buckets. The type of bucket You can use bucket aggregations to implement faceted navigation (usually placed as a sidebar on a search result landing page) to help your users narrow down the results. -## terms +## Terms The `terms` aggregation dynamically creates a bucket for each unique term of a field. @@ -74,6 +74,121 @@ The `terms` aggregation requests each shard for its top 3 unique terms. The coor This is especially true if `size` is set to a low number. Because the default size is 10, an error is unlikely to happen. If you don’t need high accuracy and want to increase the performance, you can reduce the size. +## Multi-terms + +Similar to the `terms` bucket aggregation, you can also search for multiple terms using the `multi_terms` aggregation. Multi-terms aggregations are useful when you need to sort by document count, or when you need to sort by a metric aggregation on a composite key and get the top `n` results. For example, you could search for a specific number of documents (e.g., 1000) and the number of servers per location that show CPU usage greater than 90%. The top number of results would be returned for this multi-term query. + +The `multi_terms` aggregation does consume more memory than a `terms` aggregation, so its performance might be slower. +{: .tip } + +### Multi-terms aggregation parameters + +Parameter | Description +:--- | :--- +multi_terms | Indicates a multi-terms aggregation that gathers buckets of documents together based on criteria specified by multiple terms. +size | Specifies the number of buckets to return. Default is 10. +order | Indicates the order to sort the buckets. By default, buckets are ordered according to document count per bucket. If the buckets contain the same document count, then `order` can be explicitly set to the term value instead of document count. (e.g., set `order` to "max-cpu"). +doc_count | Specifies the number of documents to be returned in each bucket. By default, the top 10 terms are returned. + +#### Sample Request + +```json +GET sample-index100/_search +{ + "size": 0, + "aggs": { + "hot": { + "multi_terms": { + "terms": [{ + "field": "region" + },{ + "field": "host" + }], + "order": {"max-cpu": "desc"} + }, + "aggs": { + "max-cpu": { "max": { "field": "cpu" } } + } + } + } +} +``` + +#### Sample Response + +```json +{ + "took": 118, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 8, + "relation": "eq" + }, + "max_score": null, + "hits": [] + }, + "aggregations": { + "multi-terms": { + "doc_count_error_upper_bound": 0, + "sum_other_doc_count": 0, + "buckets": [ + { + "key": [ + "dub", + "h1" + ], + "key_as_string": "dub|h1", + "doc_count": 2, + "max-cpu": { + "value": 90.0 + } + }, + { + "key": [ + "dub", + "h2" + ], + "key_as_string": "dub|h2", + "doc_count": 2, + "max-cpu": { + "value": 70.0 + } + }, + { + "key": [ + "iad", + "h2" + ], + "key_as_string": "iad|h2", + "doc_count": 2, + "max-cpu": { + "value": 50.0 + } + }, + { + "key": [ + "iad", + "h1" + ], + "key_as_string": "iad|h1", + "doc_count": 2, + "max-cpu": { + "value": 15.0 + } + } + ] + } + } +} +``` + ## sampler, diversified_sampler If you're aggregating over millions of documents, you can use a `sampler` aggregation to reduce its scope to a small sample of documents for a faster response. The `sampler` aggregation selects the samples by top-scoring documents. @@ -552,7 +667,6 @@ The `range` aggregation lets you define the range for each bucket. For example, you can find the number of bytes between 1000 and 2000, 2000 and 3000, and 3000 and 4000. Within the `range` parameter, you can define ranges as objects of an array. - ```json GET opensearch_dashboards_sample_data_logs/_search { @@ -709,6 +823,7 @@ GET opensearch_dashboards_sample_data_logs/_search } } ``` + If you add a document with malformed fields to an index that has `ip_range` set to `false` in its mappings, OpenSearch rejects the entire document. You can set `ignore_malformed` to `true` to specify that OpenSearch should ignore malformed fields. The default is `false`. ```json @@ -722,6 +837,7 @@ If you add a document with malformed fields to an index that has `ip_range` set } } ``` + ## filter, filters A `filter` aggregation is a query clause, exactly like a search query — `match` or `term` or `range`. You can use the `filter` aggregation to narrow down the entire set of documents to a specific set before creating buckets.