[Monitoring] Add Cluster Alert for Nodes Changed (elastic/x-pack-elasticsearch#3504)
This creates a cluster alert that triggers whenever a node is: - Added - Removed - Restarted (aka Removed and Added between collections) Unlike all previous cluster alerts, this cluster is always instantly resolved because there is no way to otherwise resolve it later (especially if nodes are replaced). This will require a small change to the UI so that it does not simply ignore resolved alerts. Original commit: elastic/x-pack-elasticsearch@6340bf7960
This commit is contained in:
parent
4c973c16c2
commit
e74f90eba0
|
@ -67,7 +67,8 @@ public class ClusterAlertsUtil {
|
|||
"elasticsearch_version_mismatch",
|
||||
"kibana_version_mismatch",
|
||||
"logstash_version_mismatch",
|
||||
"xpack_license_expiration"
|
||||
"xpack_license_expiration",
|
||||
"elasticsearch_nodes",
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
|
@ -0,0 +1,184 @@
|
|||
{
|
||||
"metadata": {
|
||||
"name": "X-Pack Monitoring: Nodes Changed (${monitoring.watch.cluster_uuid})",
|
||||
"xpack": {
|
||||
"alert_index": ".monitoring-alerts-6",
|
||||
"cluster_uuid": "${monitoring.watch.cluster_uuid}",
|
||||
"link": "elasticsearch/nodes",
|
||||
"severity": 1999,
|
||||
"type": "monitoring",
|
||||
"version_created": 7000001,
|
||||
"watch": "${monitoring.watch.id}"
|
||||
}
|
||||
},
|
||||
"trigger": {
|
||||
"schedule": {
|
||||
"interval": "1m"
|
||||
}
|
||||
},
|
||||
"input": {
|
||||
"chain": {
|
||||
"inputs": [
|
||||
{
|
||||
"check": {
|
||||
"search": {
|
||||
"request": {
|
||||
"indices": [
|
||||
".monitoring-es-*"
|
||||
],
|
||||
"body": {
|
||||
"size": 2,
|
||||
"sort": [
|
||||
{
|
||||
"timestamp": {
|
||||
"order": "desc"
|
||||
}
|
||||
}
|
||||
],
|
||||
"_source": [
|
||||
"cluster_state.nodes_hash",
|
||||
"cluster_state.nodes.*.name",
|
||||
"cluster_state.nodes.*.ephemeral_id"
|
||||
],
|
||||
"collapse": {
|
||||
"field": "cluster_state.nodes_hash"
|
||||
},
|
||||
"query": {
|
||||
"bool": {
|
||||
"filter": [
|
||||
{
|
||||
"term": {
|
||||
"cluster_uuid": "${monitoring.watch.cluster_uuid}"
|
||||
}
|
||||
},
|
||||
{
|
||||
"term": {
|
||||
"type": "cluster_stats"
|
||||
}
|
||||
},
|
||||
{
|
||||
"range": {
|
||||
"timestamp": {
|
||||
"gte": "now-2d"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": {
|
||||
"search": {
|
||||
"request": {
|
||||
"indices": [
|
||||
".monitoring-alerts-6"
|
||||
],
|
||||
"body": {
|
||||
"size": 1,
|
||||
"_source": [
|
||||
"nodes.hash"
|
||||
],
|
||||
"query": {
|
||||
"bool": {
|
||||
"filter": [
|
||||
{
|
||||
"term": {
|
||||
"metadata.cluster_uuid": "${monitoring.watch.cluster_uuid}"
|
||||
}
|
||||
},
|
||||
{
|
||||
"term": {
|
||||
"metadata.watch": "${monitoring.watch.id}"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"sort": [
|
||||
{
|
||||
"timestamp": {
|
||||
"order": "desc"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"kibana_settings": {
|
||||
"search": {
|
||||
"request": {
|
||||
"indices": [
|
||||
".monitoring-kibana-*"
|
||||
],
|
||||
"body": {
|
||||
"size": 1,
|
||||
"query": {
|
||||
"bool": {
|
||||
"filter": {
|
||||
"term": {
|
||||
"type": "kibana_settings"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"sort": [
|
||||
{
|
||||
"timestamp": {
|
||||
"order": "desc"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"condition": {
|
||||
"script": {
|
||||
"source": "ctx.payload.check.hits.hits.length > 1 && (ctx.payload.alert.hits.total == 0 || ctx.payload.alert.hits.hits[0]._source.nodes.hash != ctx.payload.check.hits.hits[0]._source.cluster_state.nodes_hash)"
|
||||
}
|
||||
},
|
||||
"transform": {
|
||||
"script": {
|
||||
"source": "void formatResults(StringBuilder message, String type, Map typeMap) {if (typeMap.empty == false) {message.append(' Node');if (typeMap.size() != 1) {message.append('s were');} else {message.append(' was');}message.append(' ').append(type).append(' [').append(typeMap.size()).append(']: ').append(typeMap.values().stream().collect(Collectors.joining(', ', '[', ']'))).append('.');}}ctx.vars.email_recipient = (ctx.payload.kibana_settings.hits.total > 0) ? ctx.payload.kibana_settings.hits.hits[0]._source.kibana_settings.xpack.default_admin_email : null;def clusterState = ctx.payload.check.hits.hits[0]._source.cluster_state;def persistentUuidToName = [:];def latestNodes = clusterState.nodes;def ephemeralUuidToPersistentUuid = [:];def payload = ['timestamp': ctx.execution_time,'updated_timestamp': ctx.execution_time,'resolved_timestamp': ctx.execution_time,'metadata': ctx.metadata.xpack,'prefix': 'Elasticsearch cluster nodes have changed!','nodes': ['hash': clusterState.nodes_hash,'added': persistentUuidToName,'removed': [:],'restarted': [:]]];for (def latestNode : latestNodes.entrySet()) {persistentUuidToName[latestNode.key] = latestNode.value.name;ephemeralUuidToPersistentUuid[latestNode.value.ephemeral_id] = latestNode.key;}def previousNodes = ctx.payload.check.hits.hits[1]._source.cluster_state.nodes;def previousPersistentUuidToName = [:];for (def previousNode : previousNodes.entrySet()){if (persistentUuidToName.containsKey(previousNode.key) == false){payload.nodes.removed[previousNode.key] = previousNode.value.name;}else{if (ephemeralUuidToPersistentUuid.containsKey(previousNode.value.ephemeral_id) == false) {payload.nodes.restarted[previousNode.key] = persistentUuidToName[previousNode.key];}persistentUuidToName.remove(previousNode.key);}}StringBuilder message = new StringBuilder();formatResults(message, 'removed', payload.nodes.removed);formatResults(message, 'added', payload.nodes.added);formatResults(message, 'restarted', payload.nodes.restarted);payload.message = message.toString().trim();return payload;"
|
||||
}
|
||||
},
|
||||
"actions": {
|
||||
"add_to_alerts_index": {
|
||||
"index": {
|
||||
"index": ".monitoring-alerts-6",
|
||||
"doc_type": "doc"
|
||||
}
|
||||
},
|
||||
"send_email_to_admin": {
|
||||
"condition": {
|
||||
"script": {
|
||||
"source": "ctx.vars.email_recipient != null",
|
||||
"lang": "painless"
|
||||
}
|
||||
},
|
||||
"email": {
|
||||
"profile": "standard",
|
||||
"from": "X-Pack Admin <{{ctx.vars.email_recipient}}>",
|
||||
"to": [
|
||||
"X-Pack Admin <{{ctx.vars.email_recipient}}>"
|
||||
],
|
||||
"subject": "{{ctx.metadata.name}}",
|
||||
"body": {
|
||||
"text": "{{ctx.payload.prefix}} {{ctx.payload.message}}"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -68,7 +68,10 @@ public class ClusterAlertsUtilTests extends ESTestCase {
|
|||
assertThat(watch, notNullValue());
|
||||
assertThat(watch, containsString(clusterUuid));
|
||||
assertThat(watch, containsString(watchId));
|
||||
assertThat(watch, containsString(clusterUuid + "_" + watchId));
|
||||
|
||||
if ("elasticsearch_nodes".equals(watchId) == false) {
|
||||
assertThat(watch, containsString(clusterUuid + "_" + watchId));
|
||||
}
|
||||
|
||||
// validate that it's well formed JSON
|
||||
assertThat(XContentHelper.convertToMap(XContentType.JSON.xContent(), watch, false), notNullValue());
|
||||
|
|
|
@ -12,6 +12,7 @@ import org.elasticsearch.common.xcontent.XContentType;
|
|||
import org.elasticsearch.test.junit.annotations.TestLogging;
|
||||
import org.elasticsearch.test.rest.ESRestTestCase;
|
||||
import org.elasticsearch.test.rest.yaml.ObjectPath;
|
||||
import org.elasticsearch.xpack.monitoring.exporter.ClusterAlertsUtil;
|
||||
import org.elasticsearch.xpack.watcher.actions.ActionBuilders;
|
||||
import org.elasticsearch.xpack.watcher.client.WatchSourceBuilders;
|
||||
import org.elasticsearch.xpack.watcher.trigger.TriggerBuilders;
|
||||
|
@ -51,7 +52,7 @@ public class MonitoringWithWatcherRestIT extends ESRestTestCase {
|
|||
adminClient().performRequest("PUT", "_cluster/settings", Collections.emptyMap(),
|
||||
new StringEntity(body, ContentType.APPLICATION_JSON));
|
||||
|
||||
assertTotalWatchCount(5);
|
||||
assertTotalWatchCount(ClusterAlertsUtil.WATCH_IDS.length);
|
||||
|
||||
assertMonitoringWatchHasBeenOverWritten(watchId);
|
||||
}
|
||||
|
@ -69,7 +70,7 @@ public class MonitoringWithWatcherRestIT extends ESRestTestCase {
|
|||
adminClient().performRequest("PUT", "_cluster/settings", Collections.emptyMap(),
|
||||
new StringEntity(body, ContentType.APPLICATION_JSON));
|
||||
|
||||
assertTotalWatchCount(5);
|
||||
assertTotalWatchCount(ClusterAlertsUtil.WATCH_IDS.length);
|
||||
|
||||
assertMonitoringWatchHasBeenOverWritten(watchId);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue