Merge pull request #179 from opensearch-project/bucket-level-alerting
Modified monitoring to have two kinds of monitors
This commit is contained in:
commit
99e545b92b
|
@ -19,10 +19,12 @@ Use the alerting API to programmatically manage monitors and alerts.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Create monitor
|
## Create query-level monitor
|
||||||
Introduced 1.0
|
Introduced 1.0
|
||||||
{: .label .label-purple }
|
{: .label .label-purple }
|
||||||
|
|
||||||
|
Query-level monitors run the query and check whether the results should trigger any alerts. As such, query-level monitors can only trigger one alert at a time. For more information about query-level monitors versus bucket-level monitors, see [Create monitors]({{site.url}}{{site.baseurl}}/monitoring-plugins/alerting/monitors/#create-monitors).
|
||||||
|
|
||||||
#### Request
|
#### Request
|
||||||
|
|
||||||
```json
|
```json
|
||||||
|
@ -30,6 +32,7 @@ POST _plugins/_alerting/monitors
|
||||||
{
|
{
|
||||||
"type": "monitor",
|
"type": "monitor",
|
||||||
"name": "test-monitor",
|
"name": "test-monitor",
|
||||||
|
"monitor_type": "query_level_monitor",
|
||||||
"enabled": true,
|
"enabled": true,
|
||||||
"schedule": {
|
"schedule": {
|
||||||
"period": {
|
"period": {
|
||||||
|
@ -166,7 +169,7 @@ If you use a custom webhook for your destination and need to embed JSON in the m
|
||||||
},
|
},
|
||||||
"throttle_enabled": false,
|
"throttle_enabled": false,
|
||||||
"subject_template": {
|
"subject_template": {
|
||||||
"source": "TheSubject",
|
"source": "Subject",
|
||||||
"lang": "mustache"
|
"lang": "mustache"
|
||||||
}
|
}
|
||||||
}]
|
}]
|
||||||
|
@ -186,6 +189,7 @@ The following example creates a monitor that runs at 12:10 PM Pacific Time on th
|
||||||
{
|
{
|
||||||
"type": "monitor",
|
"type": "monitor",
|
||||||
"name": "test-monitor",
|
"name": "test-monitor",
|
||||||
|
"monitor_type": "query_level_monitor",
|
||||||
"enabled": true,
|
"enabled": true,
|
||||||
"schedule": {
|
"schedule": {
|
||||||
"cron" : {
|
"cron" : {
|
||||||
|
@ -228,7 +232,7 @@ The following example creates a monitor that runs at 12:10 PM Pacific Time on th
|
||||||
"name": "test-action",
|
"name": "test-action",
|
||||||
"destination_id": "ld7912sBlQ5JUWWFThoW",
|
"destination_id": "ld7912sBlQ5JUWWFThoW",
|
||||||
"message_template": {
|
"message_template": {
|
||||||
"source": "This is my message body."
|
"source": "This is a message body."
|
||||||
},
|
},
|
||||||
"throttle_enabled": true,
|
"throttle_enabled": true,
|
||||||
"throttle": {
|
"throttle": {
|
||||||
|
@ -236,7 +240,7 @@ The following example creates a monitor that runs at 12:10 PM Pacific Time on th
|
||||||
"unit": "MINUTES"
|
"unit": "MINUTES"
|
||||||
},
|
},
|
||||||
"subject_template": {
|
"subject_template": {
|
||||||
"source": "TheSubject"
|
"source": "Subject"
|
||||||
}
|
}
|
||||||
}]
|
}]
|
||||||
}]
|
}]
|
||||||
|
@ -247,6 +251,265 @@ For a full list of timezone names, refer to [Wikipedia](https://en.wikipedia.org
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
## Create bucket-level monitor
|
||||||
|
|
||||||
|
Bucket-level monitors categorize results into buckets separated by fields. The monitor then runs your script with each bucket's results and evaluates whether to trigger an alert. For more information about bucket-level monitors versus query-level monitors, see [Create monitors]({{site.url}}{{site.baseurl}}/monitoring-plugins/alerting/monitors/#create-monitors).
|
||||||
|
|
||||||
|
```json
|
||||||
|
POST _plugins/_alerting/monitors
|
||||||
|
{
|
||||||
|
"type": "monitor",
|
||||||
|
"name": "test-bucket-level-monitor",
|
||||||
|
"monitor_type": "bucket_level_monitor",
|
||||||
|
"enabled": true,
|
||||||
|
"schedule": {
|
||||||
|
"period": {
|
||||||
|
"interval": 1,
|
||||||
|
"unit": "MINUTES"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"search": {
|
||||||
|
"indices": [
|
||||||
|
"movies"
|
||||||
|
],
|
||||||
|
"query": {
|
||||||
|
"size": 0,
|
||||||
|
"query": {
|
||||||
|
"bool": {
|
||||||
|
"filter": [
|
||||||
|
{
|
||||||
|
"range": {
|
||||||
|
"order_date": {
|
||||||
|
"from": "{{period_end}}||-1h",
|
||||||
|
"to": "{{period_end}}",
|
||||||
|
"include_lower": true,
|
||||||
|
"include_upper": true,
|
||||||
|
"format": "epoch_millis"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"aggregations": {
|
||||||
|
"composite_agg": {
|
||||||
|
"composite": {
|
||||||
|
"sources": [
|
||||||
|
{
|
||||||
|
"user": {
|
||||||
|
"terms": {
|
||||||
|
"field": "user"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"aggregations": {
|
||||||
|
"avg_products_base_price": {
|
||||||
|
"avg": {
|
||||||
|
"field": "products.base_price"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"triggers": [
|
||||||
|
{
|
||||||
|
"bucket_level_trigger": {
|
||||||
|
"name": "test-trigger",
|
||||||
|
"severity": "1",
|
||||||
|
"condition": {
|
||||||
|
"buckets_path": {
|
||||||
|
"_count": "_count",
|
||||||
|
"avg_products_base_price": "avg_products_base_price"
|
||||||
|
},
|
||||||
|
"parent_bucket_path": "composite_agg",
|
||||||
|
"script": {
|
||||||
|
"source": "params._count > 50 || params.avg_products_base_price < 35",
|
||||||
|
"lang": "painless"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"actions": [
|
||||||
|
{
|
||||||
|
"name": "test-action",
|
||||||
|
"destination_id": "E4o5hnsB6KjPKmHtpfCA",
|
||||||
|
"message_template": {
|
||||||
|
"source": """Monitor {{ctx.monitor.name}} just entered alert status. Please investigate the issue. - Trigger: {{ctx.trigger.name}} - Severity: {{ctx.trigger.severity}} - Period start: {{ctx.periodStart}} - Period end: {{ctx.periodEnd}} - Deduped Alerts: {{ctx.dedupedAlerts}} * {{id}} : {{bucket_keys}} {{ctx.dedupedAlerts}} - New Alerts: {{ctx.newAlerts}} * {{id}} : {{bucket_keys}} {{ctx.newAlerts}} - Completed Alerts: {{ctx.completedAlerts}} * {{id}} : {{bucket_keys}} {{ctx.completedAlerts}}""",
|
||||||
|
"lang": "mustache"
|
||||||
|
},
|
||||||
|
"throttle_enabled": false,
|
||||||
|
"throttle": {
|
||||||
|
"value": 10,
|
||||||
|
"unit": "MINUTES"
|
||||||
|
},
|
||||||
|
"action_execution_policy": {
|
||||||
|
"action_execution_scope": {
|
||||||
|
"per_alert": {
|
||||||
|
"actionable_alerts": [
|
||||||
|
"DEDUPED",
|
||||||
|
"NEW"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"subject_template": {
|
||||||
|
"source": "The Subject",
|
||||||
|
"lang": "mustache"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Sample response
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"_id" : "Dfxr63sBwex6DxEhHV5N",
|
||||||
|
"_version" : 1,
|
||||||
|
"_seq_no" : 3,
|
||||||
|
"_primary_term" : 1,
|
||||||
|
"monitor" : {
|
||||||
|
"type" : "monitor",
|
||||||
|
"schema_version" : 4,
|
||||||
|
"name" : "test-bucket-level-monitor",
|
||||||
|
"monitor_type" : "bucket_level_monitor",
|
||||||
|
"user" : {
|
||||||
|
"name" : "",
|
||||||
|
"backend_roles" : [ ],
|
||||||
|
"roles" : [ ],
|
||||||
|
"custom_attribute_names" : [ ],
|
||||||
|
"user_requested_tenant" : null
|
||||||
|
},
|
||||||
|
"enabled" : true,
|
||||||
|
"enabled_time" : 1631742270785,
|
||||||
|
"schedule" : {
|
||||||
|
"period" : {
|
||||||
|
"interval" : 1,
|
||||||
|
"unit" : "MINUTES"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"inputs" : [
|
||||||
|
{
|
||||||
|
"search" : {
|
||||||
|
"indices" : [
|
||||||
|
"opensearch_dashboards_sample_data_flights"
|
||||||
|
],
|
||||||
|
"query" : {
|
||||||
|
"size" : 0,
|
||||||
|
"query" : {
|
||||||
|
"bool" : {
|
||||||
|
"filter" : [
|
||||||
|
{
|
||||||
|
"range" : {
|
||||||
|
"order_date" : {
|
||||||
|
"from" : "{{period_end}}||-1h",
|
||||||
|
"to" : "{{period_end}}",
|
||||||
|
"include_lower" : true,
|
||||||
|
"include_upper" : true,
|
||||||
|
"format" : "epoch_millis",
|
||||||
|
"boost" : 1.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"adjust_pure_negative" : true,
|
||||||
|
"boost" : 1.0
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"aggregations" : {
|
||||||
|
"composite_agg" : {
|
||||||
|
"composite" : {
|
||||||
|
"size" : 10,
|
||||||
|
"sources" : [
|
||||||
|
{
|
||||||
|
"user" : {
|
||||||
|
"terms" : {
|
||||||
|
"field" : "user",
|
||||||
|
"missing_bucket" : false,
|
||||||
|
"order" : "asc"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"aggregations" : {
|
||||||
|
"avg_products_base_price" : {
|
||||||
|
"avg" : {
|
||||||
|
"field" : "products.base_price"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"triggers" : [
|
||||||
|
{
|
||||||
|
"bucket_level_trigger" : {
|
||||||
|
"id" : "C_xr63sBwex6DxEhHV5B",
|
||||||
|
"name" : "test-trigger",
|
||||||
|
"severity" : "1",
|
||||||
|
"condition" : {
|
||||||
|
"buckets_path" : {
|
||||||
|
"_count" : "_count",
|
||||||
|
"avg_products_base_price" : "avg_products_base_price"
|
||||||
|
},
|
||||||
|
"parent_bucket_path" : "composite_agg",
|
||||||
|
"script" : {
|
||||||
|
"source" : "params._count > 50 || params.avg_products_base_price < 35",
|
||||||
|
"lang" : "painless"
|
||||||
|
},
|
||||||
|
"gap_policy" : "skip"
|
||||||
|
},
|
||||||
|
"actions" : [
|
||||||
|
{
|
||||||
|
"id" : "DPxr63sBwex6DxEhHV5B",
|
||||||
|
"name" : "test-action",
|
||||||
|
"destination_id" : "E4o5hnsB6KjPKmHtpfCA",
|
||||||
|
"message_template" : {
|
||||||
|
"source" : "Monitor {{ctx.monitor.name}} just entered alert status. Please investigate the issue. - Trigger: {{ctx.trigger.name}} - Severity: {{ctx.trigger.severity}} - Period start: {{ctx.periodStart}} - Period end: {{ctx.periodEnd}} - Deduped Alerts: {{ctx.dedupedAlerts}} * {{id}} : {{bucket_keys}} {{ctx.dedupedAlerts}} - New Alerts: {{ctx.newAlerts}} * {{id}} : {{bucket_keys}} {{ctx.newAlerts}} - Completed Alerts: {{ctx.completedAlerts}} * {{id}} : {{bucket_keys}} {{ctx.completedAlerts}}",
|
||||||
|
"lang" : "mustache"
|
||||||
|
},
|
||||||
|
"throttle_enabled" : false,
|
||||||
|
"subject_template" : {
|
||||||
|
"source" : "The Subject",
|
||||||
|
"lang" : "mustache"
|
||||||
|
},
|
||||||
|
"throttle" : {
|
||||||
|
"value" : 10,
|
||||||
|
"unit" : "MINUTES"
|
||||||
|
},
|
||||||
|
"action_execution_policy" : {
|
||||||
|
"action_execution_scope" : {
|
||||||
|
"per_alert" : {
|
||||||
|
"actionable_alerts" : [
|
||||||
|
"DEDUPED",
|
||||||
|
"NEW"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"last_update_time" : 1631742270785
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
## Update monitor
|
## Update monitor
|
||||||
Introduced 1.0
|
Introduced 1.0
|
||||||
{: .label .label-purple }
|
{: .label .label-purple }
|
||||||
|
|
|
@ -19,11 +19,11 @@ has_children: false
|
||||||
|
|
||||||
Term | Definition
|
Term | Definition
|
||||||
:--- | :---
|
:--- | :---
|
||||||
Monitor | A job that runs on a defined schedule and queries OpenSearch. The results of these queries are then used as input for one or more *triggers*.
|
Monitor | A job that runs on a defined schedule and queries OpenSearch indices. The results of these queries are then used as input for one or more *triggers*.
|
||||||
Trigger | Conditions that, if met, generate *alerts*.
|
Trigger | Conditions that, if met, generate *alerts*.
|
||||||
Alert | An event associated with a trigger. When an alert is created, the trigger performs *actions*, which can include sending a notification.
|
Alert | An event associated with a trigger. When an alert is created, the trigger performs *actions*, which can include sending a notification.
|
||||||
Action | The information that you want the monitor to send out after being triggered. Actions have a *destination*, a message subject, and a message body.
|
Action | The information that you want the monitor to send out after being triggered. Actions have a *destination*, a message subject, and a message body.
|
||||||
Destination | A reusable location for an action, such as Amazon Chime, Slack, or a webhook URL.
|
Destination | A reusable location for an action. Supported locations are Amazon Chime, Email, Slack, or custom webhook.
|
||||||
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
@ -34,9 +34,9 @@ Destination | A reusable location for an action, such as Amazon Chime, Slack, or
|
||||||
1. Specify a name for the destination so that you can identify it later.
|
1. Specify a name for the destination so that you can identify it later.
|
||||||
1. For **Type**, choose Slack, Amazon Chime, custom webhook, or [email](#email-as-a-destination).
|
1. For **Type**, choose Slack, Amazon Chime, custom webhook, or [email](#email-as-a-destination).
|
||||||
|
|
||||||
For Email type, refer to [Email as a destination](#email-as-a-destination) section below. For all other types, specify the webhook URL. For more information about webhooks, see the documentation for [Slack](https://api.slack.com/incoming-webhooks) and [Amazon Chime](https://docs.aws.amazon.com/chime/latest/ug/webhooks.html).
|
For Email, refer to the [Email as a destination](#email-as-a-destination) section below. For all other types, specify the webhook URL. See the documentation for [Slack](https://api.slack.com/incoming-webhooks) and [Amazon Chime](https://docs.aws.amazon.com/chime/latest/ug/webhooks.html) to learn more about webhooks.
|
||||||
|
|
||||||
For custom webhooks, you must specify more information: parameters and headers. For example, if your endpoint requires basic authentication, you might need to add a header with a key of `Authorization` and a value of `Basic <Base64-encoded-credential-string>`. You might also need to change `Content-Type` to whatever your webhook requires. Popular values are `application/json`, `application/xml`, and `text/plain`.
|
If you're using custom webhooks, you must specify more information: parameters and headers. For example, if your endpoint requires basic authentication, you might need to add a header with a key of `Authorization` and a value of `Basic <Base64-encoded-credential-string>`. You might also need to change `Content-Type` to whatever your webhook requires. Popular values are `application/json`, `application/xml`, and `text/plain`.
|
||||||
|
|
||||||
This information is stored in plain text in the OpenSearch cluster. We will improve this design in the future, but for now, the encoded credentials (which are neither encrypted nor hashed) might be visible to other OpenSearch users.
|
This information is stored in plain text in the OpenSearch cluster. We will improve this design in the future, but for now, the encoded credentials (which are neither encrypted nor hashed) might be visible to other OpenSearch users.
|
||||||
|
|
||||||
|
@ -55,7 +55,7 @@ To configure a sender email, do the following:
|
||||||
1. After you choose **Email** as the destination type, choose **Manage senders**.
|
1. After you choose **Email** as the destination type, choose **Manage senders**.
|
||||||
1. Choose **Add sender**, **New sender** and enter a unique name.
|
1. Choose **Add sender**, **New sender** and enter a unique name.
|
||||||
1. Enter the email address, SMTP host (e.g. `smtp.gmail.com` for a Gmail account), and the port.
|
1. Enter the email address, SMTP host (e.g. `smtp.gmail.com` for a Gmail account), and the port.
|
||||||
1. Choose an encryption method, or use the default value of **None**. However, most email providers require SSL or TLS, which requires a username and password in OpenSearch keystore. Refer to [Authenticate sender account](#authenticate-sender-account) to learn more.
|
1. Choose an encryption method, or use the default value of **None**. However, most email providers require SSL or TLS, which require a username and password in OpenSearch keystore. Refer to [Authenticate sender account](#authenticate-sender-account) to learn more.
|
||||||
1. Choose **Save** to save the configuration and create the sender. You can create a sender even before you add your credentials to the OpenSearch keystore. However, you must [authenticate each sender account](#authenticate-sender-account) before you use the destination to send your alert.
|
1. Choose **Save** to save the configuration and create the sender. You can create a sender even before you add your credentials to the OpenSearch keystore. However, you must [authenticate each sender account](#authenticate-sender-account) before you use the destination to send your alert.
|
||||||
|
|
||||||
You can reuse senders across many different destinations, but each destination only supports one sender.
|
You can reuse senders across many different destinations, but each destination only supports one sender.
|
||||||
|
@ -82,7 +82,7 @@ If your email provider requires SSL or TLS, you must authenticate each sender ac
|
||||||
./bin/opensearch-keystore add plugins.alerting.destination.email.<sender_name>.password
|
./bin/opensearch-keystore add plugins.alerting.destination.email.<sender_name>.password
|
||||||
```
|
```
|
||||||
|
|
||||||
**Note**: Keystore settings are node-specific. You must run these commands on each node.
|
Note: Keystore settings are node-specific. You must run these commands on each node.
|
||||||
{: .note}
|
{: .note}
|
||||||
|
|
||||||
To change or update your credentials (after you've added them to the keystore on every node), call the reload API to automatically update those credentials without restarting OpenSearch:
|
To change or update your credentials (after you've added them to the keystore on every node), call the reload API to automatically update those credentials without restarting OpenSearch:
|
||||||
|
@ -101,20 +101,9 @@ POST _nodes/reload_secure_settings
|
||||||
|
|
||||||
1. Choose **Alerting**, **Monitors**, **Create monitor**.
|
1. Choose **Alerting**, **Monitors**, **Create monitor**.
|
||||||
1. Specify a name for the monitor.
|
1. Specify a name for the monitor.
|
||||||
|
1. Choose either **Per query monitor** or **Per bucket monitor**.
|
||||||
|
|
||||||
The anomaly detection option is for pairing with the anomaly detection plugin. See [Anomaly Detection]({{site.url}}{{site.baseurl}}/monitoring-plugins/ad/).
|
Whereas query-level monitors run your specified query and then check whether the query's results triggers any alerts, bucket-level monitors let you select fields to create buckets and categorize your results into those buckets. The alerting plugin runs each bucket's unique results against a script you define later, so you have finer control over which results should trigger alerts. Each of those buckets can trigger an alert, but query-level monitors can only trigger one alert at a time.
|
||||||
For anomaly detector, choose an appropriate schedule for the monitor based on the detector interval. Otherwise, the alerting monitor might miss reading the results.
|
|
||||||
|
|
||||||
For example, assume you set the monitor interval and the detector interval as 5 minutes, and you start the detector at 12:00. If an anomaly is detected at 12:05, it might be available at 12:06 because of the delay between writing the anomaly and it being available for queries. The monitor reads the anomaly results between 12:00 and 12:05, so it does not get the anomaly results available at 12:06.
|
|
||||||
|
|
||||||
To avoid this issue, make sure the alerting monitor is at least twice the detector interval.
|
|
||||||
When you create a monitor using OpenSearch Dashboards, the anomaly detector plugin generates a default monitor schedule that's twice the detector interval.
|
|
||||||
|
|
||||||
Whenever you update a detector’s interval, make sure to update the associated monitor interval as well, as the anomaly detection plugin does not do this automatically.
|
|
||||||
|
|
||||||
1. Choose one or more indices. You can also use `*` as a wildcard to specify an index pattern.
|
|
||||||
|
|
||||||
If you use the security plugin, you can only choose indices that you have permission to access. For details, see [Alerting security]({{site.url}}{{site.baseurl}}/security-plugin/).
|
|
||||||
|
|
||||||
1. Define the monitor in one of three ways: visually, using a query, or using an anomaly detector.
|
1. Define the monitor in one of three ways: visually, using a query, or using an anomaly detector.
|
||||||
|
|
||||||
|
@ -167,39 +156,57 @@ Whenever you update a detector’s interval, make sure to update the associated
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
"Start" and "end" refer to the interval at which the monitor runs. See [Available variables](#available-variables).
|
"Start" and "end" refer to the interval at which the monitor runs. See [Available variables](#available-variables).
|
||||||
|
|
||||||
|
To define a monitor visually, choose **Visual editor**. Then choose a source index, a timeframe, an aggregation (for example, `count()` or `average()`), a data filter if you want to monitor a subset of your source index, and a group-by field if you want to include an aggregation field in your query. At least one group-by field is required if you're defining a bucket-level monitor. Visual definition works well for most monitors.
|
||||||
|
|
||||||
1. To define a monitor visually, choose **Define using visual graph**. Then choose an aggregation (for example, `count()` or `average()`), a set of documents, and a timeframe. Visual definition works well for most monitors.
|
If you use the security plugin, you can only choose indices that you have permission to access. For details, see [Alerting security]({{site.url}}{{site.baseurl}}/security-plugin/).
|
||||||
|
|
||||||
To use a query, choose **Define using extraction query**, add your query (using [the OpenSearch query DSL]({{site.url}}{{site.baseurl}}/opensearch/query-dsl/full-text/)), and test it using the **Run** button.
|
To use a query, choose **Extraction query editor**, add your query (using [the OpenSearch query DSL]({{site.url}}{{site.baseurl}}/opensearch/query-dsl/full-text/)), and test it using the **Run** button.
|
||||||
|
|
||||||
The monitor makes this query to OpenSearch as often as the schedule dictates; check the **Query Performance** section and make sure you're comfortable with the performance implications.
|
The monitor makes this query to OpenSearch as often as the schedule dictates; check the **Query Performance** section and make sure you're comfortable with the performance implications.
|
||||||
|
|
||||||
|
To use an anomaly detector, choose **Anomaly detector** and select your **Detector**.
|
||||||
|
|
||||||
|
The anomaly detection option is for pairing with the anomaly detection plugin. See [Anomaly Detection]({{site.url}}{{site.baseurl}}/monitoring-plugins/ad/).
|
||||||
|
For anomaly detector, choose an appropriate schedule for the monitor based on the detector interval. Otherwise, the alerting monitor might miss reading the results.
|
||||||
|
|
||||||
|
For example, assume you set the monitor interval and the detector interval as 5 minutes, and you start the detector at 12:00. If an anomaly is detected at 12:05, it might be available at 12:06 because of the delay between writing the anomaly and it being available for queries. The monitor reads the anomaly results between 12:00 and 12:05, so it does not get the anomaly results available at 12:06.
|
||||||
|
|
||||||
|
To avoid this issue, make sure the alerting monitor is at least twice the detector interval.
|
||||||
|
When you create a monitor using OpenSearch Dashboards, the anomaly detector plugin generates a default monitor schedule that's twice the detector interval.
|
||||||
|
|
||||||
|
Whenever you update a detector’s interval, make sure to update the associated monitor interval as well, as the anomaly detection plugin does not do this automatically.
|
||||||
|
|
||||||
|
**Note**: Anomaly detection is available only if you are defining a per query monitor.
|
||||||
|
{: .note}
|
||||||
|
|
||||||
To use an anomaly detector, choose **Define using Anomaly detector** and select your **Detector**.
|
|
||||||
1. Choose a frequency and timezone for your monitor. Note that you can only pick a timezone if you choose Daily, Weekly, Monthly, or [custom cron expression]({{site.url}}{{site.baseurl}}/monitoring-plugins/alerting/cron/) for frequency.
|
1. Choose a frequency and timezone for your monitor. Note that you can only pick a timezone if you choose Daily, Weekly, Monthly, or [custom cron expression]({{site.url}}{{site.baseurl}}/monitoring-plugins/alerting/cron/) for frequency.
|
||||||
1. Choose **Create**.
|
|
||||||
|
|
||||||
|
1. Add a trigger to your monitor.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Create triggers
|
## Create triggers
|
||||||
|
|
||||||
The next step in creating a monitor is to create a trigger. These steps differ depending on whether you chose **Define using visual graph** or **Define using extraction query** or **Define using Anomaly detector** when you created the monitor.
|
Steps to create a trigger differ depending on whether you chose **Visual editor**, **Extraction query editor**, or **Anomaly detector** when you created the monitor.
|
||||||
|
|
||||||
Either way, you begin by specifying a name and severity level for the trigger. Severity levels help you manage alerts. A trigger with a high severity level (e.g. 1) might page a specific individual, whereas a trigger with a low severity level might message a chat room.
|
You begin by specifying a name and severity level for the trigger. Severity levels help you manage alerts. A trigger with a high severity level (e.g. 1) might page a specific individual, whereas a trigger with a low severity level might message a chat room.
|
||||||
|
|
||||||
|
Remember that query-level monitors run your trigger's script just once against the query's results, but bucket-level monitors execute your trigger's script on each bucket, so you should create a trigger that best fits the monitor you chose. If you want to execute multiple scripts, you must create multiple triggers.
|
||||||
|
|
||||||
### Visual graph
|
### Visual editor
|
||||||
|
|
||||||
For **Trigger condition**, specify a threshold for the aggregation and timeframe you chose earlier, such as "is below 1,000" or "is exactly 10."
|
For a query-level monitor's **Trigger condition**, specify a threshold for the aggregation and timeframe you chose earlier, such as "is below 1,000" or "is exactly 10."
|
||||||
|
|
||||||
The line moves up and down as you increase and decrease the threshold. Once this line is crossed, the trigger evaluates to true.
|
The line moves up and down as you increase and decrease the threshold. Once this line is crossed, the trigger evaluates to true.
|
||||||
|
|
||||||
|
Bucket-level monitors also require you to specify a threshold and value for your aggregation and timeframe, but you can use a maximum of five conditions to better refine your trigger. Optionally, you can also use a keyword filter to filter for a specific field in your index.
|
||||||
|
|
||||||
|
|
||||||
### Extraction query
|
### Extraction query
|
||||||
|
|
||||||
For **Trigger condition**, specify a Painless script that returns true or false. Painless is the default OpenSearch scripting language and has a syntax similar to Groovy.
|
If you're using a query-level monitor, specify a Painless script that returns true or false. Painless is the default OpenSearch scripting language and has a syntax similar to Groovy.
|
||||||
|
|
||||||
Trigger condition scripts revolve around the `ctx.results[0]` variable, which corresponds to the extraction query response. For example, your script might reference `ctx.results[0].hits.total.value` or `ctx.results[0].hits.hits[i]._source.error_code`.
|
Trigger condition scripts revolve around the `ctx.results[0]` variable, which corresponds to the extraction query response. For example, your script might reference `ctx.results[0].hits.total.value` or `ctx.results[0].hits.hits[i]._source.error_code`.
|
||||||
|
|
||||||
|
@ -208,6 +215,27 @@ A return value of true means the trigger condition has been met, and the trigger
|
||||||
The **Info** link next to **Trigger condition** contains a useful summary of the variables and results available to your query.
|
The **Info** link next to **Trigger condition** contains a useful summary of the variables and results available to your query.
|
||||||
{: .tip }
|
{: .tip }
|
||||||
|
|
||||||
|
Bucket-level monitors require you to specify more information in your trigger condition. At a minimum, you must have the following fields:
|
||||||
|
|
||||||
|
- `buckets_path`, which maps variable names to metrics to use in your script.
|
||||||
|
- `parent_bucket_path`, which is a path to a multi-bucket aggregation. The path can include single-bucket aggregations, but the last aggregation must be multi-bucket. For example, if you have a pipeline such as `agg1>agg2>agg3`, `agg1` and `agg2` are single-bucket aggregations, but `agg3` must be a multi-bucket aggregation.
|
||||||
|
- `script`, which is the script that OpenSearch runs to evaluate whether to trigger any alerts.
|
||||||
|
|
||||||
|
For example, you might have a script that looks like the following:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"buckets_path": {
|
||||||
|
"count_var": "_count"
|
||||||
|
},
|
||||||
|
"parent_bucket_path": "composite_agg",
|
||||||
|
"script": {
|
||||||
|
"source": "params.count_var > 5"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
After mapping the `count_var` variable to the `_count` metric, you can use `count_var` in your script and reference `_count` data. Finally, `composite_agg` is a path to a multi-bucket aggregation.
|
||||||
|
|
||||||
### Anomaly detector
|
### Anomaly detector
|
||||||
|
|
||||||
|
@ -264,11 +292,11 @@ Below are some variables you can include in your message using Mustache template
|
||||||
|
|
||||||
Variable | Data Type | Description
|
Variable | Data Type | Description
|
||||||
:--- | :--- | :---
|
:--- | :--- | :---
|
||||||
`ctx.monitor` | JSON | Includes `ctx.monitor.name`, `ctx.monitor.type`, `ctx.monitor.enabled`, `ctx.monitor.enabled_time`, `ctx.monitor.schedule`, `ctx.monitor.inputs`, `triggers` and `ctx.monitor.last_update_time`.
|
`ctx.monitor` | Object | Includes `ctx.monitor.name`, `ctx.monitor.type`, `ctx.monitor.enabled`, `ctx.monitor.enabled_time`, `ctx.monitor.schedule`, `ctx.monitor.inputs`, `triggers` and `ctx.monitor.last_update_time`.
|
||||||
`ctx.monitor.user` | JSON | Includes information about the user who created the monitor. Includes `ctx.monitor.user.backend_roles` and `ctx.monitor.user.roles`, which are arrays that contain the backend roles and roles assigned to the user. See [alerting security]({{site.url}}{{site.baseurl}}/monitoring-plugins/alerting/security/) for more information.
|
`ctx.monitor.user` | Object | Includes information about the user who created the monitor. Includes `ctx.monitor.user.backend_roles` and `ctx.monitor.user.roles`, which are arrays that contain the backend roles and roles assigned to the user. See [alerting security]({{site.url}}{{site.baseurl}}/monitoring-plugins/alerting/security/) for more information.
|
||||||
`ctx.monitor.enabled` | Boolean | Whether the monitor is enabled.
|
`ctx.monitor.enabled` | Boolean | Whether the monitor is enabled.
|
||||||
`ctx.monitor.enabled_time` | Milliseconds | Unix epoch time of when the monitor was last enabled.
|
`ctx.monitor.enabled_time` | Milliseconds | Unix epoch time of when the monitor was last enabled.
|
||||||
`ctx.monitor.schedule` | JSON | Contains a schedule of how often or when the monitor should run.
|
`ctx.monitor.schedule` | Object | Contains a schedule of how often or when the monitor should run.
|
||||||
`ctx.monitor.schedule.period.interval` | Integer | The interval at which the monitor runs.
|
`ctx.monitor.schedule.period.interval` | Integer | The interval at which the monitor runs.
|
||||||
`ctx.monitor.schedule.period.unit` | String | The interval's unit of time.
|
`ctx.monitor.schedule.period.unit` | String | The interval's unit of time.
|
||||||
`ctx.monitor.inputs` | Array | An array that contains the indices and definition used to create the monitor.
|
`ctx.monitor.inputs` | Array | An array that contains the indices and definition used to create the monitor.
|
||||||
|
@ -282,7 +310,7 @@ Variable | Data Type | Description
|
||||||
`ctx.trigger.id` | String | The trigger's ID.
|
`ctx.trigger.id` | String | The trigger's ID.
|
||||||
`ctx.trigger.name` | String | The trigger's name.
|
`ctx.trigger.name` | String | The trigger's name.
|
||||||
`ctx.trigger.severity` | String | The trigger's severity.
|
`ctx.trigger.severity` | String | The trigger's severity.
|
||||||
`ctx.trigger.condition`| JSON | Contains the Painless script used when creating the monitor.
|
`ctx.trigger.condition`| Object | Contains the Painless script used when creating the monitor.
|
||||||
`ctx.trigger.condition.script.source` | String | The language used to define the script. Must be painless.
|
`ctx.trigger.condition.script.source` | String | The language used to define the script. Must be painless.
|
||||||
`ctx.trigger.condition.script.lang` | String | The script used to define the trigger.
|
`ctx.trigger.condition.script.lang` | String | The script used to define the trigger.
|
||||||
`ctx.trigger.actions`| Array | An array with one element that contains information about the action the monitor needs to trigger.
|
`ctx.trigger.actions`| Array | An array with one element that contains information about the action the monitor needs to trigger.
|
||||||
|
@ -309,7 +337,13 @@ Variable | Data Type | Description
|
||||||
`ctx.periodStart` | String | Unix timestamp for the beginning of the period during which the alert triggered. For example, if a monitor runs every ten minutes, a period might begin at 10:40 and end at 10:50.
|
`ctx.periodStart` | String | Unix timestamp for the beginning of the period during which the alert triggered. For example, if a monitor runs every ten minutes, a period might begin at 10:40 and end at 10:50.
|
||||||
`ctx.periodEnd` | String | The end of the period during which the alert triggered.
|
`ctx.periodEnd` | String | The end of the period during which the alert triggered.
|
||||||
`ctx.error` | String | The error message if the trigger was unable to retrieve results or unable to evaluate the trigger, typically due to a compile error or null pointer exception. Null otherwise.
|
`ctx.error` | String | The error message if the trigger was unable to retrieve results or unable to evaluate the trigger, typically due to a compile error or null pointer exception. Null otherwise.
|
||||||
`ctx.alert` | JSON | The current, active alert (if it exists). Includes `ctx.alert.id`, `ctx.alert.version`, and `ctx.alert.isAcknowledged`. Null if no alert is active.
|
`ctx.alert` | Object | The current, active alert (if it exists). Includes `ctx.alert.id`, `ctx.alert.version`, and `ctx.alert.isAcknowledged`. Null if no alert is active. Only available with query-level monitors.
|
||||||
|
`ctx.dedupedAlerts` | Object | Alerts that have already been triggered. OpenSearch keeps the existing alert to prevent the plugin from creating endless amounts of the same alerts. Only available with bucket-level monitors.
|
||||||
|
`ctx.newAlerts` | Object | Newly created alerts. Only available with bucket-level monitors.
|
||||||
|
`ctx.completedAlerts` | Object | Alerts that are no longer ongoing. Only available with bucket-level monitors.
|
||||||
|
`bucket_keys` | String | Comma-separated list of the monitor's bucket key values. Available only for `ctx.dedupedAlerts`, `ctx.newAlerts`, and `ctx.completedAlerts`. Accessed through `ctx.dedupedAlerts[0].bucket_keys`.
|
||||||
|
`parent_bucket_path` | String | The parent bucket path of the bucket that triggered the alert. Accessed through `ctx.dedupedAlerts[0].parent_bucket_path`.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
@ -334,6 +368,7 @@ If you don't want to receive notifications for alerts, you don't have to add act
|
||||||
```
|
```
|
||||||
|
|
||||||
In this case, the message content must conform to the `Content-Type` header in the [custom webhook](#create-destinations).
|
In this case, the message content must conform to the `Content-Type` header in the [custom webhook](#create-destinations).
|
||||||
|
1. If you're using a bucket-level monitor, you can choose whether the monitor should perform an action for each execution or for each alert.
|
||||||
|
|
||||||
1. (Optional) Use action throttling to limit the number of notifications you receive within a given span of time.
|
1. (Optional) Use action throttling to limit the number of notifications you receive within a given span of time.
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue