From 0f3f22deb2422c5b66f833c4654ce8cb299804d7 Mon Sep 17 00:00:00 2001 From: Lisa Cawley Date: Tue, 9 May 2017 08:12:59 -0700 Subject: [PATCH] [DOCS] Add ML info_content functions (elastic/x-pack-elasticsearch#1354) Original commit: elastic/x-pack-elasticsearch@5cfa9618b57781b6b55129bfad5670d5634093e4 --- docs/en/ml/functions/info.asciidoc | 115 +++++++++++++++++++++++++++-- 1 file changed, 108 insertions(+), 7 deletions(-) diff --git a/docs/en/ml/functions/info.asciidoc b/docs/en/ml/functions/info.asciidoc index 2c9cbac6b65..453b2a11b00 100644 --- a/docs/en/ml/functions/info.asciidoc +++ b/docs/en/ml/functions/info.asciidoc @@ -1,10 +1,6 @@ [[ml-info-functions]] === Information Content Functions -The {xpackml} features include the following information content functions: - -* `info_content`, `high_info_content`, `low_info_content` - The information content functions detect anomalies in the amount of information that is contained in strings within a bucket. These functions can be used as a more sophisticated method to identify incidences of data exfiltration or @@ -13,11 +9,116 @@ C2C activity, when analyzing the size in bytes of the data might not be sufficie If you want to monitor for unusually high amounts of information, use `high_info_content`. If want to look at drops in information content, use `low_info_content`. -//// +The {xpackml} features include the following information content functions: + +* <> +* <> +* <> + +[float] +[[ml-info-content]] +==== Info_content + +The `info_content` function detects anomalies in the amount of information that +is contained in strings in a bucket. + +This function supports the following properties: + +* `field_name` (required) +* `by_field_name` (optional) +* `over_field_name` (optional) +* `partition_field_name` (optional) +* `summary_count_field_name` (optional) + +For more information about those properties, +see <>. + +For example, if you use the following function in a detector in your job, it +models information that is present in the `subdomain` string. It detects +anomalies where the information content is unusual compared to the other +`highest_registered_domain` values. An anomaly could indicate an abuse of the +DNS protocol, such as malicious command and control activity. [source,js] -------------------------------------------------- -{ "function" : "info_content", "fieldName" : "subdomain", "overFieldName" : "highest_registered_domain" } +{ + "function" : "info_content", + "field_name" : "subdomain", + "over_field_name" : "highest_registered_domain" +} -------------------------------------------------- -//// +NOTE: Both high and low values are considered anomalous. In many use cases, the +`high_info_content` function is often a more appropriate choice. + +[float] +[[ml-high-info-content]] +==== High_info_content + +The `high_info_content` function detects anomalies in the amount of information +that is contained in strings in a bucket. Use this function if you want to +monitor for unusually high amounts of information. + +This function supports the following properties: + +* `field_name` (required) +* `by_field_name` (optional) +* `over_field_name` (optional) +* `partition_field_name` (optional) +* `summary_count_field_name` (optional) + +For more information about those properties, +see <>. + +For example, if you use the following function in a detector in your job, it +models information content that is held in the DNS query string. It detects +`src_ip` values where the information content is unusually high compared to +other `src_ip` values. This example is similar to the example for the +`info_content` function, but it reports anomalies only where the amount of +information content is higher than expected. +//TBD: Still pertinent? "This configuration identifies activity typical of DGA malware."" + +[source,js] +-------------------------------------------------- +{ + "function" : "high_info_content", + "field_name" : "query", + "over_field_name" : "src_ip" +} +-------------------------------------------------- + +[float] +[[ml-low-info-content]] +==== Low_info_content + +The `low_info_content` function detects anomalies in the amount of information +that is contained in strings in a bucket. Use this function if you want to look +at drops in information content. + +This function supports the following properties: + +* `field_name` (required) +* `by_field_name` (optional) +* `over_field_name` (optional) +* `partition_field_name` (optional) +* `summary_count_field_name` (optional) + +For more information about those properties, +see <>. + +For example, if you use the following function in a detector in your job, it +models information content that is present in the message string for each +`logfilename`. It detects anomalies where the information content is low compared +to its past behavior. For example, this function detects unusually low amounts +of information in a collection of rolling log files. Low information might +indicate that a process has entered an infinite loop or that logging features +have been disabled. + +[source,js] +-------------------------------------------------- +{ + "function" : "low_info_content", + "field_name" : "message", + "by_field_name" : "logfilename" +} +--------------------------------------------------