Optional removal of metrics from Prometheus PushGateway on shutdown (#14935)

* Optional removal of metrics from Prometheus PushGateway on shutdown

* Make pushGatewayDeleteOnShutdown property nullable

* Add waitForShutdownDelay property

* Fix unit test

* Address PR comments

* Address PR comments

* Add explanation on why it is useful to have deletePushGatewayMetricsOnShutdown

* Fix spelling error

* Fix spelling error
This commit is contained in:
Bartosz Mikulski 2023-12-13 17:58:53 +01:00 committed by GitHub
parent 8bc7a5f3ac
commit 4670a7650f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 179 additions and 21 deletions

View File

@ -31,7 +31,6 @@ This extension exposes [Druid metrics](https://druid.apache.org/docs/latest/oper
Emitter is enabled by setting `druid.emitter=prometheus` [configs](https://druid.apache.org/docs/latest/configuration/index.html#enabling-metrics) or include `prometheus` in the composing emitter list.
## Configuration
All the configuration parameters for the Prometheus emitter are under `druid.emitter.prometheus`.
@ -47,6 +46,8 @@ All the configuration parameters for the Prometheus emitter are under `druid.emi
| `druid.emitter.prometheus.pushGatewayAddress` | Pushgateway address. Required if using `pushgateway` strategy. | no | none |
| `druid.emitter.prometheus.flushPeriod` | Emit metrics to Pushgateway every `flushPeriod` seconds. Required if `pushgateway` strategy is used. | no | 15 |
| `druid.emitter.prometheus.extraLabels` | JSON key-value pairs for additional labels on all metrics. Keys (label names) must match the regex `[a-zA-Z_:][a-zA-Z0-9_:]*`. Example: `{"cluster_name": "druid_cluster1", "env": "staging"}`. | no | none |
| `druid.emitter.prometheus.deletePushGatewayMetricsOnShutdown` | Flag to delete metrics from Pushgateway on task shutdown. Works only if `pushgateway` strategy is used. This feature allows to delete a stale metrics from batch executed tasks. Otherwise, the Pushgateway will store these stale metrics indefinitely as there is [no time to live mechanism](https://github.com/prometheus/pushgateway/issues/117), using the memory to hold data that was already scraped by Prometheus. | no | false |
| `druid.emitter.prometheus.waitForShutdownDelay` | Time in milliseconds to wait for peon tasks to delete metrics from the Pushgateway on shutdown (e.g. 60_000). Applicable only when `pushgateway` strategy is used and `pushGatewayDeleteOnShutdown` is set to true. Be aware that there's no guarantee that a peon task will delete metrics from the gateway if the configured delay is more than the [Peon's `druid.indexer.task.gracefulShutdownTimeout`](https://druid.apache.org/docs/latest/configuration/#additional-peon-configuration). It is recommended that this value is 1.2 times the configured Prometheus `scrape_interval` of Pushgateway. This ensures, that the metrics should be scraped before the cleanup. | no | none |
### Ports for colocated Druid processes

View File

@ -69,7 +69,13 @@ public class PrometheusEmitter implements Emitter
{
this.config = config;
this.strategy = config.getStrategy();
metrics = new Metrics(config.getNamespace(), config.getDimensionMapPath(), config.isAddHostAsLabel(), config.isAddServiceAsLabel(), config.getExtraLabels());
metrics = new Metrics(
config.getNamespace(),
config.getDimensionMapPath(),
config.isAddHostAsLabel(),
config.isAddServiceAsLabel(),
config.getExtraLabels()
);
}
@ -164,7 +170,8 @@ public class PrometheusEmitter implements Emitter
} else if (metric.getCollector() instanceof Gauge) {
((Gauge) metric.getCollector()).labels(labelValues).set(value.doubleValue());
} else if (metric.getCollector() instanceof Histogram) {
((Histogram) metric.getCollector()).labels(labelValues).observe(value.doubleValue() / metric.getConversionFactor());
((Histogram) metric.getCollector()).labels(labelValues)
.observe(value.doubleValue() / metric.getConversionFactor());
} else {
log.error("Unrecognized metric type [%s]", metric.getCollector().getClass());
}
@ -202,11 +209,36 @@ public class PrometheusEmitter implements Emitter
{
if (strategy.equals(PrometheusEmitterConfig.Strategy.exporter)) {
if (server != null) {
server.stop();
server.close();
}
} else {
exec.shutdownNow();
flush();
try {
if (config.getWaitForShutdownDelay().getMillis() > 0) {
log.info("Waiting [%s]ms before deleting metrics from the push gateway.", config.getWaitForShutdownDelay().getMillis());
Thread.sleep(config.getWaitForShutdownDelay().getMillis());
}
}
catch (InterruptedException e) {
log.error(e, "Interrupted while waiting for shutdown delay. Deleting metrics from the push gateway now.");
}
finally {
deletePushGatewayMetrics();
}
}
}
private void deletePushGatewayMetrics()
{
if (pushGateway != null && config.isDeletePushGatewayMetricsOnShutdown()) {
try {
pushGateway.delete(config.getNamespace(), ImmutableMap.of(config.getNamespace(), identifier));
}
catch (IOException e) {
log.error(e, "Unable to delete prometheus metrics from push gateway");
}
}
}

View File

@ -24,6 +24,7 @@ import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Preconditions;
import org.apache.druid.error.DruidException;
import org.apache.druid.java.util.common.StringUtils;
import org.joda.time.Duration;
import javax.annotation.Nullable;
import java.util.Collections;
@ -70,6 +71,12 @@ public class PrometheusEmitterConfig
@JsonProperty
private final Map<String, String> extraLabels;
@JsonProperty
private final boolean deletePushGatewayMetricsOnShutdown;
@JsonProperty
private final Duration waitForShutdownDelay;
@JsonCreator
public PrometheusEmitterConfig(
@JsonProperty("strategy") @Nullable Strategy strategy,
@ -80,7 +87,9 @@ public class PrometheusEmitterConfig
@JsonProperty("addHostAsLabel") boolean addHostAsLabel,
@JsonProperty("addServiceAsLabel") boolean addServiceAsLabel,
@JsonProperty("flushPeriod") Integer flushPeriod,
@JsonProperty("extraLabels") @Nullable Map<String, String> extraLabels
@JsonProperty("extraLabels") @Nullable Map<String, String> extraLabels,
@JsonProperty("deletePushGatewayMetricsOnShutdown") @Nullable Boolean deletePushGatewayMetricsOnShutdown,
@JsonProperty("waitForShutdownDelay") @Nullable Long waitForShutdownDelay
)
{
this.strategy = strategy != null ? strategy : Strategy.exporter;
@ -103,6 +112,23 @@ public class PrometheusEmitterConfig
this.addHostAsLabel = addHostAsLabel;
this.addServiceAsLabel = addServiceAsLabel;
this.extraLabels = extraLabels != null ? extraLabels : Collections.emptyMap();
this.deletePushGatewayMetricsOnShutdown = deletePushGatewayMetricsOnShutdown != null && deletePushGatewayMetricsOnShutdown;
if (waitForShutdownDelay == null) {
this.waitForShutdownDelay = Duration.ZERO;
} else if (waitForShutdownDelay >= 0) {
this.waitForShutdownDelay = Duration.millis(waitForShutdownDelay);
} else {
throw DruidException.forPersona(DruidException.Persona.OPERATOR)
.ofCategory(DruidException.Category.INVALID_INPUT)
.build(
StringUtils.format(
"Invalid value for waitForShutdownDelay[%s] specified, waitForShutdownDelay must be >= 0.",
waitForShutdownDelay
)
);
}
// Validate label names early to prevent Prometheus exceptions later.
for (String key : this.extraLabels.keySet()) {
if (!PATTERN.matcher(key).matches()) {
@ -165,6 +191,16 @@ public class PrometheusEmitterConfig
return extraLabels;
}
public boolean isDeletePushGatewayMetricsOnShutdown()
{
return deletePushGatewayMetricsOnShutdown;
}
public Duration getWaitForShutdownDelay()
{
return waitForShutdownDelay;
}
public enum Strategy
{
exporter, pushgateway

View File

@ -39,7 +39,7 @@ public class PrometheusEmitterConfigTest
// Expect an exception thrown by our own PrometheusEmitterConfig due to invalid label key
Exception exception = Assert.assertThrows(DruidException.class, () -> {
new PrometheusEmitterConfig(PrometheusEmitterConfig.Strategy.exporter, null, null, 0, null, false, true, 60, extraLabels);
new PrometheusEmitterConfig(PrometheusEmitterConfig.Strategy.exporter, null, null, 0, null, false, true, 60, extraLabels, false, null);
});
String expectedMessage = "Invalid metric label name [label Name]. Label names must conform to the pattern [[a-zA-Z_:][a-zA-Z0-9_:]*]";

View File

@ -25,6 +25,7 @@ import io.prometheus.client.CollectorRegistry;
import io.prometheus.client.exporter.PushGateway;
import org.apache.druid.java.util.emitter.core.Emitter;
import org.apache.druid.java.util.emitter.service.ServiceMetricEvent;
import org.easymock.EasyMock;
import org.junit.Assert;
import org.junit.Test;
@ -34,6 +35,7 @@ import java.util.Map;
import static org.easymock.EasyMock.anyObject;
import static org.easymock.EasyMock.anyString;
import static org.easymock.EasyMock.expectLastCall;
import static org.easymock.EasyMock.mock;
public class PrometheusEmitterTest
@ -42,7 +44,7 @@ public class PrometheusEmitterTest
public void testEmitterWithServiceLabel()
{
CollectorRegistry.defaultRegistry.clear();
PrometheusEmitterConfig config = new PrometheusEmitterConfig(PrometheusEmitterConfig.Strategy.exporter, null, null, 0, null, false, true, 60, null);
PrometheusEmitterConfig config = new PrometheusEmitterConfig(PrometheusEmitterConfig.Strategy.exporter, null, null, 0, null, false, true, 60, null, false, null);
PrometheusEmitterModule prometheusEmitterModule = new PrometheusEmitterModule();
Emitter emitter = prometheusEmitterModule.getEmitter(config);
ServiceMetricEvent build = ServiceMetricEvent.builder()
@ -63,7 +65,7 @@ public class PrometheusEmitterTest
public void testEmitterWithServiceAndHostLabel()
{
CollectorRegistry.defaultRegistry.clear();
PrometheusEmitterConfig config = new PrometheusEmitterConfig(PrometheusEmitterConfig.Strategy.exporter, null, null, 0, null, true, true, 60, null);
PrometheusEmitterConfig config = new PrometheusEmitterConfig(PrometheusEmitterConfig.Strategy.exporter, null, null, 0, null, true, true, 60, null, false, null);
PrometheusEmitterModule prometheusEmitterModule = new PrometheusEmitterModule();
Emitter emitter = prometheusEmitterModule.getEmitter(config);
ServiceMetricEvent build = ServiceMetricEvent.builder()
@ -86,7 +88,7 @@ public class PrometheusEmitterTest
CollectorRegistry.defaultRegistry.clear();
Map<String, String> extraLabels = new HashMap<>();
extraLabels.put("labelName", "labelValue");
PrometheusEmitterConfig config = new PrometheusEmitterConfig(PrometheusEmitterConfig.Strategy.exporter, null, null, 0, null, false, false, 60, extraLabels);
PrometheusEmitterConfig config = new PrometheusEmitterConfig(PrometheusEmitterConfig.Strategy.exporter, null, null, 0, null, false, false, 60, extraLabels, false, null);
PrometheusEmitterModule prometheusEmitterModule = new PrometheusEmitterModule();
Emitter emitter = prometheusEmitterModule.getEmitter(config);
ServiceMetricEvent build = ServiceMetricEvent.builder()
@ -108,7 +110,7 @@ public class PrometheusEmitterTest
CollectorRegistry.defaultRegistry.clear();
Map<String, String> extraLabels = new HashMap<>();
extraLabels.put("labelName", "labelValue");
PrometheusEmitterConfig config = new PrometheusEmitterConfig(PrometheusEmitterConfig.Strategy.exporter, null, null, 0, null, false, true, 60, extraLabels);
PrometheusEmitterConfig config = new PrometheusEmitterConfig(PrometheusEmitterConfig.Strategy.exporter, null, null, 0, null, false, true, 60, extraLabels, false, null);
PrometheusEmitterModule prometheusEmitterModule = new PrometheusEmitterModule();
Emitter emitter = prometheusEmitterModule.getEmitter(config);
ServiceMetricEvent build = ServiceMetricEvent.builder()
@ -129,7 +131,7 @@ public class PrometheusEmitterTest
{
CollectorRegistry.defaultRegistry.clear();
Map<String, String> extraLabels = new HashMap<>();
PrometheusEmitterConfig config = new PrometheusEmitterConfig(PrometheusEmitterConfig.Strategy.exporter, null, null, 0, null, false, true, 60, extraLabels);
PrometheusEmitterConfig config = new PrometheusEmitterConfig(PrometheusEmitterConfig.Strategy.exporter, null, null, 0, null, false, true, 60, extraLabels, false, null);
PrometheusEmitterModule prometheusEmitterModule = new PrometheusEmitterModule();
Emitter emitter = prometheusEmitterModule.getEmitter(config);
ServiceMetricEvent build = ServiceMetricEvent.builder()
@ -152,7 +154,7 @@ public class PrometheusEmitterTest
Map<String, String> extraLabels = new HashMap<>();
extraLabels.put("labelName1", "labelValue1");
extraLabels.put("labelName2", "labelValue2");
PrometheusEmitterConfig config = new PrometheusEmitterConfig(PrometheusEmitterConfig.Strategy.exporter, null, null, 0, null, false, true, 60, extraLabels);
PrometheusEmitterConfig config = new PrometheusEmitterConfig(PrometheusEmitterConfig.Strategy.exporter, null, null, 0, null, false, true, 60, extraLabels, false, null);
PrometheusEmitterModule prometheusEmitterModule = new PrometheusEmitterModule();
Emitter emitter = prometheusEmitterModule.getEmitter(config);
ServiceMetricEvent build = ServiceMetricEvent.builder()
@ -175,7 +177,7 @@ public class PrometheusEmitterTest
// ExtraLabels contains a label that collides with a service label
Map<String, String> extraLabels = new HashMap<>();
extraLabels.put("server", "collisionLabelValue");
PrometheusEmitterConfig config = new PrometheusEmitterConfig(PrometheusEmitterConfig.Strategy.exporter, null, null, 0, null, false, true, 60, extraLabels);
PrometheusEmitterConfig config = new PrometheusEmitterConfig(PrometheusEmitterConfig.Strategy.exporter, null, null, 0, null, false, true, 60, extraLabels, false, null);
PrometheusEmitterModule prometheusEmitterModule = new PrometheusEmitterModule();
Emitter emitter = prometheusEmitterModule.getEmitter(config);
ServiceMetricEvent build = ServiceMetricEvent.builder()
@ -202,7 +204,7 @@ public class PrometheusEmitterTest
public void testEmitterMetric()
{
CollectorRegistry.defaultRegistry.clear();
PrometheusEmitterConfig config = new PrometheusEmitterConfig(PrometheusEmitterConfig.Strategy.pushgateway, "namespace", null, 0, "pushgateway", true, true, 60, null);
PrometheusEmitterConfig config = new PrometheusEmitterConfig(PrometheusEmitterConfig.Strategy.pushgateway, "namespace", null, 0, "pushgateway", true, true, 60, null, false, null);
PrometheusEmitterModule prometheusEmitterModule = new PrometheusEmitterModule();
Emitter emitter = prometheusEmitterModule.getEmitter(config);
ServiceMetricEvent build = ServiceMetricEvent.builder()
@ -223,12 +225,12 @@ public class PrometheusEmitterTest
@Test
public void testEmitterStart()
{
PrometheusEmitterConfig exportEmitterConfig = new PrometheusEmitterConfig(PrometheusEmitterConfig.Strategy.exporter, "namespace1", null, 0, null, true, true, 60, null);
PrometheusEmitterConfig exportEmitterConfig = new PrometheusEmitterConfig(PrometheusEmitterConfig.Strategy.exporter, "namespace1", null, 0, null, true, true, 60, null, false, null);
PrometheusEmitter exportEmitter = new PrometheusEmitter(exportEmitterConfig);
exportEmitter.start();
Assert.assertNotNull(exportEmitter.getServer());
PrometheusEmitterConfig pushEmitterConfig = new PrometheusEmitterConfig(PrometheusEmitterConfig.Strategy.pushgateway, "namespace2", null, 0, "pushgateway", true, true, 60, null);
PrometheusEmitterConfig pushEmitterConfig = new PrometheusEmitterConfig(PrometheusEmitterConfig.Strategy.pushgateway, "namespace2", null, 0, "pushgateway", true, true, 60, null, false, null);
PrometheusEmitter pushEmitter = new PrometheusEmitter(pushEmitterConfig);
pushEmitter.start();
Assert.assertNotNull(pushEmitter.getPushGateway());
@ -237,7 +239,7 @@ public class PrometheusEmitterTest
@Test
public void testEmitterPush() throws IOException
{
PrometheusEmitterConfig emitterConfig = new PrometheusEmitterConfig(PrometheusEmitterConfig.Strategy.pushgateway, "namespace3", null, 0, "pushgateway", true, true, 60, null);
PrometheusEmitterConfig emitterConfig = new PrometheusEmitterConfig(PrometheusEmitterConfig.Strategy.pushgateway, "namespace3", null, 0, "pushgateway", true, true, 60, null, false, null);
PushGateway mockPushGateway = mock(PushGateway.class);
mockPushGateway.push(anyObject(Collector.class), anyString(), anyObject(ImmutableMap.class));
@ -266,6 +268,8 @@ public class PrometheusEmitterTest
true,
true,
60,
null,
false,
null
);
@ -281,6 +285,8 @@ public class PrometheusEmitterTest
true,
true,
50,
null,
false,
null
)
);
@ -289,7 +295,7 @@ public class PrometheusEmitterTest
@Test
public void testEmitterStartWithHttpUrl()
{
PrometheusEmitterConfig pushEmitterConfig = new PrometheusEmitterConfig(PrometheusEmitterConfig.Strategy.pushgateway, "namespace4", null, 0, "http://pushgateway", true, true, 60, null);
PrometheusEmitterConfig pushEmitterConfig = new PrometheusEmitterConfig(PrometheusEmitterConfig.Strategy.pushgateway, "namespace4", null, 0, "http://pushgateway", true, true, 60, null, false, null);
PrometheusEmitter pushEmitter = new PrometheusEmitter(pushEmitterConfig);
pushEmitter.start();
Assert.assertNotNull(pushEmitter.getPushGateway());
@ -298,7 +304,7 @@ public class PrometheusEmitterTest
@Test
public void testEmitterStartWithHttpsUrl()
{
PrometheusEmitterConfig pushEmitterConfig = new PrometheusEmitterConfig(PrometheusEmitterConfig.Strategy.pushgateway, "namespace5", null, 0, "https://pushgateway", true, true, 60, null);
PrometheusEmitterConfig pushEmitterConfig = new PrometheusEmitterConfig(PrometheusEmitterConfig.Strategy.pushgateway, "namespace5", null, 0, "https://pushgateway", true, true, 60, null, false, null);
PrometheusEmitter pushEmitter = new PrometheusEmitter(pushEmitterConfig);
pushEmitter.start();
Assert.assertNotNull(pushEmitter.getPushGateway());
@ -319,6 +325,8 @@ public class PrometheusEmitterTest
true,
true,
60,
null,
false,
null
)
);
@ -333,7 +341,88 @@ public class PrometheusEmitterTest
true,
true,
60,
null,
false,
null
);
}
@Test
public void testEmitterWithDeleteOnShutdown() throws IOException
{
PrometheusEmitterConfig emitterConfig = new PrometheusEmitterConfig(PrometheusEmitterConfig.Strategy.pushgateway, "namespace3", null, 0, "pushgateway", true, true, 60, null, true, null);
PushGateway mockPushGateway = mock(PushGateway.class);
mockPushGateway.push(anyObject(CollectorRegistry.class), anyString(), anyObject(ImmutableMap.class));
expectLastCall().atLeastOnce();
mockPushGateway.delete(anyString(), anyObject(ImmutableMap.class));
expectLastCall().once();
EasyMock.replay(mockPushGateway);
PrometheusEmitter emitter = new PrometheusEmitter(emitterConfig);
emitter.start();
emitter.setPushGateway(mockPushGateway);
ServiceMetricEvent build = ServiceMetricEvent.builder()
.setDimension("task", "index_parallel")
.setMetric("task/run/time", 500)
.build(ImmutableMap.of("service", "peon", "host", "druid.test.cn"));
emitter.emit(build);
emitter.flush();
emitter.close();
EasyMock.verify(mockPushGateway);
}
@Test
public void testEmitterWithDeleteOnShutdownAndWait() throws IOException
{
PrometheusEmitterConfig emitterConfig = new PrometheusEmitterConfig(PrometheusEmitterConfig.Strategy.pushgateway, "namespace6", null, 0, "pushgateway", true, true, 60, null, true, 1_000L);
PushGateway mockPushGateway = mock(PushGateway.class);
mockPushGateway.push(anyObject(CollectorRegistry.class), anyString(), anyObject(ImmutableMap.class));
expectLastCall().atLeastOnce();
mockPushGateway.delete(anyString(), anyObject(ImmutableMap.class));
expectLastCall().once();
EasyMock.replay(mockPushGateway);
PrometheusEmitter emitter = new PrometheusEmitter(emitterConfig);
emitter.start();
emitter.setPushGateway(mockPushGateway);
ServiceMetricEvent build = ServiceMetricEvent.builder()
.setDimension("task", "index_parallel")
.setMetric("task/run/time", 500)
.build(ImmutableMap.of("service", "peon", "host", "druid.test.cn"));
emitter.emit(build);
emitter.flush();
emitter.close();
EasyMock.verify(mockPushGateway);
}
@Test
public void testEmitterWithoutDeleteOnShutdown() throws IOException
{
PrometheusEmitterConfig emitterConfig = new PrometheusEmitterConfig(PrometheusEmitterConfig.Strategy.pushgateway, "namespace3", null, 0, "pushgateway", true, true, 60, null, false, null);
PushGateway mockPushGateway = mock(PushGateway.class);
mockPushGateway.push(anyObject(CollectorRegistry.class), anyString(), anyObject(ImmutableMap.class));
expectLastCall().atLeastOnce();
EasyMock.replay(mockPushGateway);
PrometheusEmitter emitter = new PrometheusEmitter(emitterConfig);
emitter.start();
emitter.setPushGateway(mockPushGateway);
ServiceMetricEvent build = ServiceMetricEvent.builder()
.setDimension("task", "index_parallel")
.setMetric("task/run/time", 500)
.build(ImmutableMap.of("service", "peon", "host", "druid.test.cn"));
emitter.emit(build);
emitter.flush();
emitter.close();
EasyMock.verify(mockPushGateway);
}
}