From d8414ffa29f1d84fae36436e43112b249cf9de87 Mon Sep 17 00:00:00 2001 From: Zachary Tong Date: Wed, 2 Aug 2017 17:47:27 -0400 Subject: [PATCH] CONSOLEify percentile and percentile-ranks docs Related #18160 --- docs/build.gradle | 34 +++++++- .../metrics/percentile-aggregation.asciidoc | 82 ++++++++++++------- .../percentile-rank-aggregation.asciidoc | 79 +++++++++++------- 3 files changed, 136 insertions(+), 59 deletions(-) diff --git a/docs/build.gradle b/docs/build.gradle index a681f415a34..e6543e450b7 100644 --- a/docs/build.gradle +++ b/docs/build.gradle @@ -31,8 +31,6 @@ buildRestTests.expectedUnconvertedCandidates = [ 'reference/aggregations/bucket/significantterms-aggregation.asciidoc', 'reference/aggregations/bucket/terms-aggregation.asciidoc', 'reference/aggregations/matrix/stats-aggregation.asciidoc', - 'reference/aggregations/metrics/percentile-aggregation.asciidoc', - 'reference/aggregations/metrics/percentile-rank-aggregation.asciidoc', 'reference/aggregations/metrics/scripted-metric-aggregation.asciidoc', 'reference/aggregations/metrics/tophits-aggregation.asciidoc', 'reference/cluster/allocation-explain.asciidoc', @@ -476,3 +474,35 @@ buildRestTests.setups['analyze_sample'] = ''' properties: obj1.field1: type: text''' + +// Used by percentile/percentile-rank aggregations +buildRestTests.setups['latency'] = ''' + - do: + indices.create: + index: latency + body: + settings: + number_of_shards: 1 + number_of_replicas: 1 + mappings: + data: + properties: + load_time: + type: long + - do: + bulk: + index: latency + type: data + refresh: true + body: |''' + + +for (int i = 0; i < 100; i++) { + def value = i + if (i % 10) { + value = i*10 + } + buildRestTests.setups['latency'] += """ + {"index":{}} + {"load_time": "$value"}""" +} diff --git a/docs/reference/aggregations/metrics/percentile-aggregation.asciidoc b/docs/reference/aggregations/metrics/percentile-aggregation.asciidoc index d07036fccf1..9db4b7b7c35 100644 --- a/docs/reference/aggregations/metrics/percentile-aggregation.asciidoc +++ b/docs/reference/aggregations/metrics/percentile-aggregation.asciidoc @@ -26,7 +26,9 @@ Let's look at a range of percentiles representing load time: [source,js] -------------------------------------------------- +GET latency/data/_search { + "size": 0, "aggs" : { "load_time_outlier" : { "percentiles" : { @@ -36,6 +38,8 @@ Let's look at a range of percentiles representing load time: } } -------------------------------------------------- +// CONSOLE +// TEST[setup:latency] <1> The field `load_time` must be a numeric field By default, the `percentile` metric will generate a range of @@ -49,18 +53,19 @@ percentiles: `[ 1, 5, 25, 50, 75, 95, 99 ]`. The response will look like this: "aggregations": { "load_time_outlier": { "values" : { - "1.0": 15, - "5.0": 20, - "25.0": 23, - "50.0": 25, - "75.0": 29, - "95.0": 60, - "99.0": 150 + "1.0": 9.9, + "5.0": 29.500000000000004, + "25.0": 167.5, + "50.0": 445.0, + "75.0": 722.5, + "95.0": 940.5, + "99.0": 980.1000000000001 } } } } -------------------------------------------------- +// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] As you can see, the aggregation will return a calculated value for each percentile in the default range. If we assume response times are in milliseconds, it is @@ -73,7 +78,9 @@ must be a value between 0-100 inclusive): [source,js] -------------------------------------------------- +GET latency/data/_search { + "size": 0, "aggs" : { "load_time_outlier" : { "percentiles" : { @@ -84,6 +91,8 @@ must be a value between 0-100 inclusive): } } -------------------------------------------------- +// CONSOLE +// TEST[setup:latency] <1> Use the `percents` parameter to specify particular percentiles to calculate ==== Keyed Response @@ -92,12 +101,13 @@ By default the `keyed` flag is set to `true` which associates a unique string ke [source,js] -------------------------------------------------- -POST bank/account/_search?size=0 +GET latency/data/_search { + "size": 0, "aggs": { - "balance_outlier": { + "load_time_outlier": { "percentiles": { - "field": "balance", + "field": "load_time", "keyed": false } } @@ -105,7 +115,7 @@ POST bank/account/_search?size=0 } -------------------------------------------------- // CONSOLE -// TEST[setup:bank] +// TEST[setup:latency] Response: @@ -115,35 +125,35 @@ Response: ... "aggregations": { - "balance_outlier": { + "load_time_outlier": { "values": [ { "key": 1.0, - "value": 1462.8400000000001 + "value": 9.9 }, { "key": 5.0, - "value": 3591.85 + "value": 29.500000000000004 }, { "key": 25.0, - "value": 13709.333333333334 + "value": 167.5 }, { "key": 50.0, - "value": 26020.11666666667 + "value": 445.0 }, { "key": 75.0, - "value": 38139.648148148146 + "value": 722.5 }, { "key": 95.0, - "value": 47551.549999999996 + "value": 940.5 }, { "key": 99.0, - "value": 49339.16 + "value": 980.1000000000001 } ] } @@ -151,13 +161,6 @@ Response: } -------------------------------------------------- // TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] -// TESTRESPONSE[s/1462.8400000000001/$body.aggregations.balance_outlier.values.0.value/] -// TESTRESPONSE[s/3591.85/$body.aggregations.balance_outlier.values.1.value/] -// TESTRESPONSE[s/13709.333333333334/$body.aggregations.balance_outlier.values.2.value/] -// TESTRESPONSE[s/26020.11666666667/$body.aggregations.balance_outlier.values.3.value/] -// TESTRESPONSE[s/38139.648148148146/$body.aggregations.balance_outlier.values.4.value/] -// TESTRESPONSE[s/47551.549999999996/$body.aggregations.balance_outlier.values.5.value/] -// TESTRESPONSE[s/49339.16/$body.aggregations.balance_outlier.values.6.value/] ==== Script @@ -167,7 +170,9 @@ a script to convert them on-the-fly: [source,js] -------------------------------------------------- +GET latency/data/_search { + "size": 0, "aggs" : { "load_time_outlier" : { "percentiles" : { @@ -183,6 +188,9 @@ a script to convert them on-the-fly: } } -------------------------------------------------- +// CONSOLE +// TEST[setup:latency] + <1> The `field` parameter is replaced with a `script` parameter, which uses the script to generate values which percentiles are calculated on <2> Scripting supports parameterized input just like any other script @@ -191,14 +199,16 @@ This will interpret the `script` parameter as an `inline` script with the `painl [source,js] -------------------------------------------------- +GET latency/data/_search { + "size": 0, "aggs" : { "load_time_outlier" : { "percentiles" : { "script" : { "id": "my_script", - "params" : { - "timeUnit" : 1000 + "params": { + "field": "load_time" } } } @@ -206,6 +216,8 @@ This will interpret the `script` parameter as an `inline` script with the `painl } } -------------------------------------------------- +// CONSOLE +// TEST[setup:latency,stored_example_script] [[search-aggregations-metrics-percentile-aggregation-approximation]] ==== Percentiles are (usually) approximate @@ -252,7 +264,9 @@ This balance can be controlled using a `compression` parameter: [source,js] -------------------------------------------------- +GET latency/data/_search { + "size": 0, "aggs" : { "load_time_outlier" : { "percentiles" : { @@ -265,6 +279,9 @@ This balance can be controlled using a `compression` parameter: } } -------------------------------------------------- +// CONSOLE +// TEST[setup:latency] + <1> Compression controls memory usage and approximation error The TDigest algorithm uses a number of "nodes" to approximate percentiles -- the @@ -298,7 +315,9 @@ The HDR Histogram can be used by specifying the `method` parameter in the reques [source,js] -------------------------------------------------- +GET latency/data/_search { + "size": 0, "aggs" : { "load_time_outlier" : { "percentiles" : { @@ -312,6 +331,9 @@ The HDR Histogram can be used by specifying the `method` parameter in the reques } } -------------------------------------------------- +// CONSOLE +// TEST[setup:latency] + <1> `hdr` object indicates that HDR Histogram should be used to calculate the percentiles and specific settings for this algorithm can be specified inside the object <2> `number_of_significant_value_digits` specifies the resolution of values for the histogram in number of significant digits @@ -326,7 +348,9 @@ had a value. [source,js] -------------------------------------------------- +GET latency/data/_search { + "size": 0, "aggs" : { "grade_percentiles" : { "percentiles" : { @@ -337,5 +361,7 @@ had a value. } } -------------------------------------------------- +// CONSOLE +// TEST[setup:latency] <1> Documents without a value in the `grade` field will fall into the same bucket as documents that have the value `10`. diff --git a/docs/reference/aggregations/metrics/percentile-rank-aggregation.asciidoc b/docs/reference/aggregations/metrics/percentile-rank-aggregation.asciidoc index 78ee1f8eef0..58266ffa7db 100644 --- a/docs/reference/aggregations/metrics/percentile-rank-aggregation.asciidoc +++ b/docs/reference/aggregations/metrics/percentile-rank-aggregation.asciidoc @@ -24,17 +24,21 @@ Let's look at a range of percentiles representing load time: [source,js] -------------------------------------------------- +GET latency/data/_search { + "size": 0, "aggs" : { - "load_time_outlier" : { + "load_time_ranks" : { "percentile_ranks" : { "field" : "load_time", <1> - "values" : [15, 30] + "values" : [500, 600] } } } } -------------------------------------------------- +// CONSOLE +// TEST[setup:latency] <1> The field `load_time` must be a numeric field The response will look like this: @@ -45,15 +49,16 @@ The response will look like this: ... "aggregations": { - "load_time_outlier": { + "load_time_ranks": { "values" : { - "15": 92, - "30": 100 + "500.0": 55.00000000000001, + "600.0": 64.0 } } } } -------------------------------------------------- +// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] From this information you can determine you are hitting the 99% load time target but not quite hitting the 95% load time target @@ -64,13 +69,14 @@ By default the `keyed` flag is set to `true` associates a unique string key with [source,js] -------------------------------------------------- -POST bank/account/_search?size=0 +GET latency/data/_search { + "size": 0, "aggs": { - "balance_outlier": { + "load_time_ranks": { "percentile_ranks": { - "field": "balance", - "values": [25000, 50000], + "field": "load_time", + "values": [500, 600], "keyed": false } } @@ -78,7 +84,7 @@ POST bank/account/_search?size=0 } -------------------------------------------------- // CONSOLE -// TEST[setup:bank] +// TEST[setup:latency] Response: @@ -88,15 +94,15 @@ Response: ... "aggregations": { - "balance_outlier": { + "load_time_ranks": { "values": [ { - "key": 25000.0, - "value": 48.537724935732655 + "key": 500.0, + "value": 55.00000000000001 }, { - "key": 50000.0, - "value": 99.85567010309278 + "key": 600.0, + "value": 64.0 } ] } @@ -104,8 +110,7 @@ Response: } -------------------------------------------------- // TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] -// TESTRESPONSE[s/48.537724935732655/$body.aggregations.balance_outlier.values.0.value/] -// TESTRESPONSE[s/99.85567010309278/$body.aggregations.balance_outlier.values.1.value/] + ==== Script @@ -115,11 +120,13 @@ a script to convert them on-the-fly: [source,js] -------------------------------------------------- +GET latency/data/_search { + "size": 0, "aggs" : { - "load_time_outlier" : { + "load_time_ranks" : { "percentile_ranks" : { - "values" : [3, 5], + "values" : [500, 600], "script" : { "lang": "painless", "source": "doc['load_time'].value / params.timeUnit", <1> @@ -132,6 +139,8 @@ a script to convert them on-the-fly: } } -------------------------------------------------- +// CONSOLE +// TEST[setup:latency] <1> The `field` parameter is replaced with a `script` parameter, which uses the script to generate values which percentile ranks are calculated on <2> Scripting supports parameterized input just like any other script @@ -140,15 +149,17 @@ This will interpret the `script` parameter as an `inline` script with the `painl [source,js] -------------------------------------------------- +GET latency/data/_search { + "size": 0, "aggs" : { - "load_time_outlier" : { + "load_time_ranks" : { "percentile_ranks" : { - "values" : [3, 5], + "values" : [500, 600], "script" : { "id": "my_script", - "params" : { - "timeUnit" : 1000 + "params": { + "field": "load_time" } } } @@ -156,6 +167,8 @@ This will interpret the `script` parameter as an `inline` script with the `painl } } -------------------------------------------------- +// CONSOLE +// TEST[setup:latency,stored_example_script] ==== HDR Histogram @@ -172,12 +185,14 @@ The HDR Histogram can be used by specifying the `method` parameter in the reques [source,js] -------------------------------------------------- +GET latency/data/_search { + "size": 0, "aggs" : { - "load_time_outlier" : { + "load_time_ranks" : { "percentile_ranks" : { "field" : "load_time", - "values" : [15, 30], + "values" : [500, 600], "hdr": { <1> "number_of_significant_value_digits" : 3 <2> } @@ -186,6 +201,8 @@ The HDR Histogram can be used by specifying the `method` parameter in the reques } } -------------------------------------------------- +// CONSOLE +// TEST[setup:latency] <1> `hdr` object indicates that HDR Histogram should be used to calculate the percentiles and specific settings for this algorithm can be specified inside the object <2> `number_of_significant_value_digits` specifies the resolution of values for the histogram in number of significant digits @@ -200,16 +217,20 @@ had a value. [source,js] -------------------------------------------------- +GET latency/data/_search { + "size": 0, "aggs" : { - "grade_ranks" : { + "load_time_ranks" : { "percentile_ranks" : { - "field" : "grade", + "field" : "load_time", + "values" : [500, 600], "missing": 10 <1> } } } } -------------------------------------------------- - -<1> Documents without a value in the `grade` field will fall into the same bucket as documents that have the value `10`. +// CONSOLE +// TEST[setup:latency] +<1> Documents without a value in the `load_time` field will fall into the same bucket as documents that have the value `10`.