Additional dimensions for service/heartbeat (#14743)

* Additional dimensions for service/heartbeat

* docs

* review

* review
This commit is contained in:
Suneet Saldanha 2023-08-04 11:01:07 -07:00 committed by GitHub
parent 590734b5eb
commit 62ddeaf16f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 57 additions and 10 deletions

View File

@ -338,7 +338,7 @@ These metrics are for the Druid Coordinator and are reset each time the Coordina
|Metric|Description|Dimensions|Normal value| |Metric|Description|Dimensions|Normal value|
|------|-----------|----------|------------| |------|-----------|----------|------------|
| `service/heartbeat` | Metric indicating the service is up. `ServiceStatusMonitor` must be enabled. |`leader` on the Overlord and Coordinator.|1| | `service/heartbeat` | Metric indicating the service is up. `ServiceStatusMonitor` must be enabled. | `leader` on the Overlord and Coordinator.<br />`workerVersion`, `category`, `status` on the Middle Manager.<br />`taskId`, `groupId`, `taskType`, `dataSource` on the Peon |1|
### Historical ### Historical

View File

@ -175,5 +175,5 @@
"namespace/cache/numEntries" : { "dimensions" : [], "type" : "gauge" }, "namespace/cache/numEntries" : { "dimensions" : [], "type" : "gauge" },
"namespace/cache/heapSizeInBytes" : { "dimensions" : [], "type" : "gauge" }, "namespace/cache/heapSizeInBytes" : { "dimensions" : [], "type" : "gauge" },
"service/heartbeat" : { "dimensions" : ["leader"], "type" : "gauge" } "service/heartbeat" : { "dimensions" : ["leader"], "type" : "count" }
} }

View File

@ -49,6 +49,9 @@ public class DruidMetrics
public static final String TAGS = "tags"; public static final String TAGS = "tags";
public static final String CATEGORY = "category";
public static final String WORKER_VERSION = "workerVersion";
public static int findNumComplexAggs(List<AggregatorFactory> aggs) public static int findNumComplexAggs(List<AggregatorFactory> aggs)
{ {
int retVal = 0; int retVal = 0;

View File

@ -33,8 +33,12 @@ import java.util.Map;
*/ */
public class ServiceStatusMonitor extends AbstractMonitor public class ServiceStatusMonitor extends AbstractMonitor
{ {
/**
* The named binding for tags that should be reported with the `service/heartbeat` metric.
*/
public static final String HEARTBEAT_TAGS_BINDING = "heartbeat";
@Named("heartbeat") @Named(HEARTBEAT_TAGS_BINDING)
@Inject(optional = true) @Inject(optional = true)
Supplier<Map<String, Object>> heartbeatTagsSupplier = null; Supplier<Map<String, Object>> heartbeatTagsSupplier = null;
@ -43,9 +47,7 @@ public class ServiceStatusMonitor extends AbstractMonitor
{ {
final ServiceMetricEvent.Builder builder = new ServiceMetricEvent.Builder(); final ServiceMetricEvent.Builder builder = new ServiceMetricEvent.Builder();
if (heartbeatTagsSupplier != null && heartbeatTagsSupplier.get() != null) { if (heartbeatTagsSupplier != null && heartbeatTagsSupplier.get() != null) {
heartbeatTagsSupplier.get().forEach((k, v) -> { heartbeatTagsSupplier.get().forEach(builder::setDimension);
builder.setDimension(k, v);
});
} }
emitter.emit(builder.build("service/heartbeat", 1)); emitter.emit(builder.build("service/heartbeat", 1));

View File

@ -26,6 +26,7 @@ import org.apache.druid.guice.annotations.Self;
import org.apache.druid.java.util.emitter.service.ServiceEmitter; import org.apache.druid.java.util.emitter.service.ServiceEmitter;
import org.apache.druid.java.util.emitter.service.ServiceMetricEvent; import org.apache.druid.java.util.emitter.service.ServiceMetricEvent;
import org.apache.druid.java.util.metrics.AbstractMonitor; import org.apache.druid.java.util.metrics.AbstractMonitor;
import org.apache.druid.query.DruidMetrics;
import java.util.Set; import java.util.Set;
@ -71,8 +72,8 @@ public class WorkerTaskCountStatsMonitor extends AbstractMonitor
{ {
if (value != null) { if (value != null) {
final ServiceMetricEvent.Builder builder = new ServiceMetricEvent.Builder(); final ServiceMetricEvent.Builder builder = new ServiceMetricEvent.Builder();
builder.setDimension("category", workerCategory); builder.setDimension(DruidMetrics.CATEGORY, workerCategory);
builder.setDimension("workerVersion", workerVersion); builder.setDimension(DruidMetrics.WORKER_VERSION, workerVersion);
emitter.emit(builder.build(metricName, value)); emitter.emit(builder.build(metricName, value));
} }
} }

View File

@ -114,6 +114,7 @@ import org.apache.druid.server.initialization.ZkPathsConfig;
import org.apache.druid.server.initialization.jetty.JettyServerInitializer; import org.apache.druid.server.initialization.jetty.JettyServerInitializer;
import org.apache.druid.server.lookup.cache.LookupCoordinatorManager; import org.apache.druid.server.lookup.cache.LookupCoordinatorManager;
import org.apache.druid.server.lookup.cache.LookupCoordinatorManagerConfig; import org.apache.druid.server.lookup.cache.LookupCoordinatorManagerConfig;
import org.apache.druid.server.metrics.ServiceStatusMonitor;
import org.apache.druid.server.router.TieredBrokerConfig; import org.apache.druid.server.router.TieredBrokerConfig;
import org.eclipse.jetty.server.Server; import org.eclipse.jetty.server.Server;
import org.joda.time.Duration; import org.joda.time.Duration;
@ -332,7 +333,7 @@ public class CliCoordinator extends ServerRunnable
binder.bind(RowIngestionMetersFactory.class).toProvider(Providers.of(null)); binder.bind(RowIngestionMetersFactory.class).toProvider(Providers.of(null));
// Bind HeartbeatSupplier only when the service operates independently of Overlord. // Bind HeartbeatSupplier only when the service operates independently of Overlord.
binder.bind(new TypeLiteral<Supplier<Map<String, Object>>>() {}) binder.bind(new TypeLiteral<Supplier<Map<String, Object>>>() {})
.annotatedWith(Names.named("heartbeat")) .annotatedWith(Names.named(ServiceStatusMonitor.HEARTBEAT_TAGS_BINDING))
.toProvider(HeartbeatSupplier.class); .toProvider(HeartbeatSupplier.class);
} }

View File

@ -20,7 +20,9 @@
package org.apache.druid.cli; package org.apache.druid.cli;
import com.github.rvesse.airline.annotations.Command; import com.github.rvesse.airline.annotations.Command;
import com.google.common.base.Supplier;
import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet; import com.google.common.collect.ImmutableSet;
import com.google.inject.Binder; import com.google.inject.Binder;
import com.google.inject.Inject; import com.google.inject.Inject;
@ -28,6 +30,7 @@ import com.google.inject.Key;
import com.google.inject.Module; import com.google.inject.Module;
import com.google.inject.Provides; import com.google.inject.Provides;
import com.google.inject.multibindings.MapBinder; import com.google.inject.multibindings.MapBinder;
import com.google.inject.name.Named;
import com.google.inject.name.Names; import com.google.inject.name.Names;
import com.google.inject.util.Providers; import com.google.inject.util.Providers;
import org.apache.druid.curator.ZkEnablementConfig; import org.apache.druid.curator.ZkEnablementConfig;
@ -67,6 +70,7 @@ import org.apache.druid.indexing.worker.shuffle.LocalIntermediaryDataManager;
import org.apache.druid.indexing.worker.shuffle.ShuffleModule; import org.apache.druid.indexing.worker.shuffle.ShuffleModule;
import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.metadata.input.InputSourceModule; import org.apache.druid.metadata.input.InputSourceModule;
import org.apache.druid.query.DruidMetrics;
import org.apache.druid.query.lookup.LookupSerdeModule; import org.apache.druid.query.lookup.LookupSerdeModule;
import org.apache.druid.segment.incremental.RowIngestionMetersFactory; import org.apache.druid.segment.incremental.RowIngestionMetersFactory;
import org.apache.druid.segment.realtime.appenderator.AppenderatorsManager; import org.apache.druid.segment.realtime.appenderator.AppenderatorsManager;
@ -76,11 +80,13 @@ import org.apache.druid.segment.realtime.firehose.NoopChatHandlerProvider;
import org.apache.druid.server.DruidNode; import org.apache.druid.server.DruidNode;
import org.apache.druid.server.http.SelfDiscoveryResource; import org.apache.druid.server.http.SelfDiscoveryResource;
import org.apache.druid.server.initialization.jetty.JettyServerInitializer; import org.apache.druid.server.initialization.jetty.JettyServerInitializer;
import org.apache.druid.server.metrics.ServiceStatusMonitor;
import org.apache.druid.server.metrics.WorkerTaskCountStatsProvider; import org.apache.druid.server.metrics.WorkerTaskCountStatsProvider;
import org.apache.druid.timeline.PruneLastCompactionState; import org.apache.druid.timeline.PruneLastCompactionState;
import org.eclipse.jetty.server.Server; import org.eclipse.jetty.server.Server;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.Properties; import java.util.Properties;
import java.util.Set; import java.util.Set;
@ -195,6 +201,18 @@ public class CliMiddleManager extends ServerRunnable
biddy.addBinding("deepstore").to(DeepStorageIntermediaryDataManager.class).in(LazySingleton.class); biddy.addBinding("deepstore").to(DeepStorageIntermediaryDataManager.class).in(LazySingleton.class);
} }
@Provides
@LazySingleton
@Named(ServiceStatusMonitor.HEARTBEAT_TAGS_BINDING)
public Supplier<Map<String, Object>> heartbeatDimensions(WorkerConfig workerConfig, WorkerTaskManager workerTaskManager)
{
return () -> ImmutableMap.of(
DruidMetrics.WORKER_VERSION, workerConfig.getVersion(),
DruidMetrics.CATEGORY, workerConfig.getCategory(),
DruidMetrics.STATUS, workerTaskManager.isWorkerEnabled() ? "Enabled" : "Disabled"
);
}
@Provides @Provides
@LazySingleton @LazySingleton
public Worker getWorker(@Self DruidNode node, WorkerConfig config) public Worker getWorker(@Self DruidNode node, WorkerConfig config)

View File

@ -118,6 +118,7 @@ import org.apache.druid.server.http.SelfDiscoveryResource;
import org.apache.druid.server.initialization.ServerConfig; import org.apache.druid.server.initialization.ServerConfig;
import org.apache.druid.server.initialization.jetty.JettyServerInitUtils; import org.apache.druid.server.initialization.jetty.JettyServerInitUtils;
import org.apache.druid.server.initialization.jetty.JettyServerInitializer; import org.apache.druid.server.initialization.jetty.JettyServerInitializer;
import org.apache.druid.server.metrics.ServiceStatusMonitor;
import org.apache.druid.server.metrics.TaskCountStatsProvider; import org.apache.druid.server.metrics.TaskCountStatsProvider;
import org.apache.druid.server.metrics.TaskSlotCountStatsProvider; import org.apache.druid.server.metrics.TaskSlotCountStatsProvider;
import org.apache.druid.server.security.AuthConfig; import org.apache.druid.server.security.AuthConfig;
@ -361,7 +362,7 @@ public class CliOverlord extends ServerRunnable
@Provides @Provides
@LazySingleton @LazySingleton
@Named("heartbeat") @Named(ServiceStatusMonitor.HEARTBEAT_TAGS_BINDING)
public Supplier<Map<String, Object>> getHeartbeatSupplier(TaskMaster taskMaster) public Supplier<Map<String, Object>> getHeartbeatSupplier(TaskMaster taskMaster)
{ {
return () -> { return () -> {

View File

@ -24,8 +24,11 @@ import com.github.rvesse.airline.annotations.Arguments;
import com.github.rvesse.airline.annotations.Command; import com.github.rvesse.airline.annotations.Command;
import com.github.rvesse.airline.annotations.Option; import com.github.rvesse.airline.annotations.Option;
import com.github.rvesse.airline.annotations.restrictions.Required; import com.github.rvesse.airline.annotations.restrictions.Required;
import com.google.common.base.Supplier;
import com.google.common.base.Suppliers;
import com.google.common.base.Throwables; import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet; import com.google.common.collect.ImmutableSet;
import com.google.inject.Binder; import com.google.inject.Binder;
import com.google.inject.Inject; import com.google.inject.Inject;
@ -96,6 +99,7 @@ import org.apache.druid.java.util.common.lifecycle.Lifecycle;
import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.metadata.IndexerSQLMetadataStorageCoordinator; import org.apache.druid.metadata.IndexerSQLMetadataStorageCoordinator;
import org.apache.druid.metadata.input.InputSourceModule; import org.apache.druid.metadata.input.InputSourceModule;
import org.apache.druid.query.DruidMetrics;
import org.apache.druid.query.QuerySegmentWalker; import org.apache.druid.query.QuerySegmentWalker;
import org.apache.druid.query.lookup.LookupModule; import org.apache.druid.query.lookup.LookupModule;
import org.apache.druid.segment.handoff.CoordinatorBasedSegmentHandoffNotifierConfig; import org.apache.druid.segment.handoff.CoordinatorBasedSegmentHandoffNotifierConfig;
@ -124,12 +128,14 @@ import org.apache.druid.server.http.SegmentListerResource;
import org.apache.druid.server.initialization.jetty.ChatHandlerServerModule; import org.apache.druid.server.initialization.jetty.ChatHandlerServerModule;
import org.apache.druid.server.initialization.jetty.JettyServerInitializer; import org.apache.druid.server.initialization.jetty.JettyServerInitializer;
import org.apache.druid.server.metrics.DataSourceTaskIdHolder; import org.apache.druid.server.metrics.DataSourceTaskIdHolder;
import org.apache.druid.server.metrics.ServiceStatusMonitor;
import org.eclipse.jetty.server.Server; import org.eclipse.jetty.server.Server;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.nio.file.Paths; import java.nio.file.Paths;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.Properties; import java.util.Properties;
import java.util.Set; import java.util.Set;
@ -259,6 +265,21 @@ public class CliPeon extends GuiceRunnable
} }
} }
@Provides
@LazySingleton
@Named(ServiceStatusMonitor.HEARTBEAT_TAGS_BINDING)
public Supplier<Map<String, Object>> heartbeatDimensions(Task task)
{
return Suppliers.ofInstance(
ImmutableMap.of(
DruidMetrics.TASK_ID, task.getId(),
DruidMetrics.DATASOURCE, task.getDataSource(),
DruidMetrics.TASK_TYPE, task.getType(),
DruidMetrics.GROUP_ID, task.getGroupId()
)
);
}
@Provides @Provides
@LazySingleton @LazySingleton
public Task readTask(@Json ObjectMapper mapper, ExecutorLifecycleConfig config) public Task readTask(@Json ObjectMapper mapper, ExecutorLifecycleConfig config)