diff --git a/docs/reference/transform/painless-examples.asciidoc b/docs/reference/transform/painless-examples.asciidoc index 8a2a4ec7386..f31f45ffa9a 100644 --- a/docs/reference/transform/painless-examples.asciidoc +++ b/docs/reference/transform/painless-examples.asciidoc @@ -15,35 +15,41 @@ more about the Painless scripting language in the * <> * <> +NOTE: While the context of the following examples is the {transform} use case, +the Painless scripts in the snippets below can be used in other {es} search +aggregations, too. + [discrete] [[painless-top-hits]] -==== Getting top hits by using scripted metric +==== Getting top hits by using scripted metric aggregation This snippet shows how to find the latest document, in other words the document with the earliest timestamp. From a technical perspective, it helps to achieve the function of a <> by using -scripted metric aggregation which provides a metric output. +scripted metric aggregation in a {transform}, which provides a metric output. [source,js] -------------------------------------------------- -"latest_doc": { - "scripted_metric": { - "init_script": "state.timestamp_latest = 0L; state.last_doc = ''", <1> - "map_script": """ <2> - def current_date = doc['@timestamp'].getValue().toInstant().toEpochMilli(); - if (current_date > state.timestamp_latest) - {state.timestamp_latest = current_date; - state.last_doc = new HashMap(params['_source']);} - """, - "combine_script": "return state", <3> - "reduce_script": """ <4> - def last_doc = ''; - def timestamp_latest = 0L; - for (s in states) {if (s.timestamp_latest > (timestamp_latest)) - {timestamp_latest = s.timestamp_latest; last_doc = s.last_doc;}} - return last_doc - """ +"aggregations": { + "latest_doc": { + "scripted_metric": { + "init_script": "state.timestamp_latest = 0L; state.last_doc = ''", <1> + "map_script": """ <2> + def current_date = doc['@timestamp'].getValue().toInstant().toEpochMilli(); + if (current_date > state.timestamp_latest) + {state.timestamp_latest = current_date; + state.last_doc = new HashMap(params['_source']);} + """, + "combine_script": "return state", <3> + "reduce_script": """ <4> + def last_doc = ''; + def timestamp_latest = 0L; + for (s in states) {if (s.timestamp_latest > (timestamp_latest)) + {timestamp_latest = s.timestamp_latest; last_doc = s.last_doc;}} + return last_doc + """ + } } } -------------------------------------------------- @@ -70,23 +76,25 @@ You can retrieve the last value in a similar way: [source,js] -------------------------------------------------- -"latest_value": { - "scripted_metric": { - "init_script": "state.timestamp_latest = 0L; state.last_value = ''", - "map_script": """ - def current_date = doc['date'].getValue().toInstant().toEpochMilli(); - if (current_date > state.timestamp_latest) - {state.timestamp_latest = current_date; - state.last_value = params['_source']['value'];} - """, - "combine_script": "return state", - "reduce_script": """ - def last_value = ''; - def timestamp_latest = 0L; - for (s in states) {if (s.timestamp_latest > (timestamp_latest)) - {timestamp_latest = s.timestamp_latest; last_value = s.last_value;}} - return last_value - """ +"aggregations": { + "latest_value": { + "scripted_metric": { + "init_script": "state.timestamp_latest = 0L; state.last_value = ''", + "map_script": """ + def current_date = doc['date'].getValue().toInstant().toEpochMilli(); + if (current_date > state.timestamp_latest) + {state.timestamp_latest = current_date; + state.last_value = params['_source']['value'];} + """, + "combine_script": "return state", + "reduce_script": """ + def last_value = ''; + def timestamp_latest = 0L; + for (s in states) {if (s.timestamp_latest > (timestamp_latest)) + {timestamp_latest = s.timestamp_latest; last_value = s.last_value;}} + return last_value + """ + } } } -------------------------------------------------- @@ -97,31 +105,35 @@ You can retrieve the last value in a similar way: [[painless-time-features]] ==== Getting time features as scripted fields -This snippet shows how to extract time based features by using Painless. The -snippet uses an index where `@timestamp` is defined as a `date` type field. +This snippet shows how to extract time based features by using Painless in a +{transform}. The snippet uses an index where `@timestamp` is defined as a `date` +type field. [source,js] -------------------------------------------------- -"script_fields": { - "hour_of_day": { <1> - "script": { - "lang": "painless", - "source": """ - ZonedDateTime date = doc['@timestamp'].value; <2> - return date.getHour(); <3> - """ +"aggregations": { + "script_fields": { + "hour_of_day": { <1> + "script": { + "lang": "painless", + "source": """ + ZonedDateTime date = doc['@timestamp'].value; <2> + return date.getHour(); <3> + """ + } + }, + "month_of_year": { <4> + "script": { + "lang": "painless", + "source": """ + ZonedDateTime date = doc['@timestamp'].value; <5> + return date.getMonthValue(); <6> + """ + } } }, - "month_of_year": { <4> - "script": { - "lang": "painless", - "source": """ - ZonedDateTime date = doc['@timestamp'].value; <5> - return date.getMonthValue(); <6> - """ - } - } - } + ... +} -------------------------------------------------- // NOTCONSOLE @@ -327,3 +339,63 @@ the buckets you want to use for the variable. In this particular case, `min` and `max` are variables mapped to `time_frame.gte.value` and `time_frame.lte.value`. <3> Finally, the script substracts the start date of the session from the end date which results in the duration of the session. + + +[discrete] +[[painless-count-http]] +==== Counting HTTP responses by using scripted metric aggregation + +You can count the different HTTP response types in a web log data set by using +scripted metric aggregation as part of the {transform}. The example below +assumes that the HTTP response codes are stored as keywords in the `response` +field of the documents. + +[source,js] +-------------------------------------------------- +"aggregations": { <1> + "responses.counts": { <2> + "scripted_metric": { <3> + "init_script": "state.responses = ['error':0L,'success':0L,'other':0L]", <4> + "map_script": """ <5> + def code = doc['response.keyword'].value; + if (code.startsWith('5') || code.startsWith('4')) { + state.responses.error += 1 ; + } else if(code.startsWith('2')) { + state.responses.success += 1; + } else { + state.responses.other += 1; + } + """, + "combine_script": "state.responses", <6> + "reduce_script": """ <7> + def counts = ['error': 0L, 'success': 0L, 'other': 0L]; + for (responses in states) { + counts.error += responses['error']; + counts.success += responses['success']; + counts.other += responses['other']; + } + return counts; + """ + } + }, + ... +} +-------------------------------------------------- +// NOTCONSOLE + +<1> The `aggregations` object of the {transform} that contains all aggregations. +<2> Object of the `scripted_metric` aggregation. +<3> This `scripted_metric` performs a distributed operation on the web log data +to count specific types of HTTP responses (error, success, and other). +<4> The `init_script` creates a `responses` array in the `state` object with +three properties (`error`, `success`, `other`) with long data type. +<5> The `map_script` defines `code` based on the `response.keyword` value of the +document, then it counts the errors, successes, and other responses based on the +first digit of the responses. +<6> The `combine_script` returns `state.responses` from each shard. +<7> The `reduce_script` creates a `counts` array with the `error`, `success`, +and `other` properties, then iterates through the value of `responses` returned +by each shard and assigns the different response types to the appropriate +properties of the `counts` object; error responses to the error counts, success +responses to the success counts, and other responses to the other counts. +Finally, returns the `counts` array with the response counts. \ No newline at end of file