Additional dimensions for service/heartbeat (#14743)

* Additional dimensions for service/heartbeat

* docs

* review

* review
This commit is contained in:
Suneet Saldanha 2023-08-04 11:01:07 -07:00 committed by GitHub
parent 590734b5eb
commit 62ddeaf16f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 57 additions and 10 deletions

View File

@ -338,7 +338,7 @@ These metrics are for the Druid Coordinator and are reset each time the Coordina
|Metric|Description|Dimensions|Normal value|
|------|-----------|----------|------------|
| `service/heartbeat` | Metric indicating the service is up. `ServiceStatusMonitor` must be enabled. |`leader` on the Overlord and Coordinator.|1|
| `service/heartbeat` | Metric indicating the service is up. `ServiceStatusMonitor` must be enabled. | `leader` on the Overlord and Coordinator.<br />`workerVersion`, `category`, `status` on the Middle Manager.<br />`taskId`, `groupId`, `taskType`, `dataSource` on the Peon |1|
### Historical

View File

@ -175,5 +175,5 @@
"namespace/cache/numEntries" : { "dimensions" : [], "type" : "gauge" },
"namespace/cache/heapSizeInBytes" : { "dimensions" : [], "type" : "gauge" },
"service/heartbeat" : { "dimensions" : ["leader"], "type" : "gauge" }
"service/heartbeat" : { "dimensions" : ["leader"], "type" : "count" }
}

View File

@ -49,6 +49,9 @@ public class DruidMetrics
public static final String TAGS = "tags";
public static final String CATEGORY = "category";
public static final String WORKER_VERSION = "workerVersion";
public static int findNumComplexAggs(List<AggregatorFactory> aggs)
{
int retVal = 0;

View File

@ -33,8 +33,12 @@ import java.util.Map;
*/
public class ServiceStatusMonitor extends AbstractMonitor
{
/**
* The named binding for tags that should be reported with the `service/heartbeat` metric.
*/
public static final String HEARTBEAT_TAGS_BINDING = "heartbeat";
@Named("heartbeat")
@Named(HEARTBEAT_TAGS_BINDING)
@Inject(optional = true)
Supplier<Map<String, Object>> heartbeatTagsSupplier = null;
@ -43,9 +47,7 @@ public class ServiceStatusMonitor extends AbstractMonitor
{
final ServiceMetricEvent.Builder builder = new ServiceMetricEvent.Builder();
if (heartbeatTagsSupplier != null && heartbeatTagsSupplier.get() != null) {
heartbeatTagsSupplier.get().forEach((k, v) -> {
builder.setDimension(k, v);
});
heartbeatTagsSupplier.get().forEach(builder::setDimension);
}
emitter.emit(builder.build("service/heartbeat", 1));

View File

@ -26,6 +26,7 @@ import org.apache.druid.guice.annotations.Self;
import org.apache.druid.java.util.emitter.service.ServiceEmitter;
import org.apache.druid.java.util.emitter.service.ServiceMetricEvent;
import org.apache.druid.java.util.metrics.AbstractMonitor;
import org.apache.druid.query.DruidMetrics;
import java.util.Set;
@ -71,8 +72,8 @@ public class WorkerTaskCountStatsMonitor extends AbstractMonitor
{
if (value != null) {
final ServiceMetricEvent.Builder builder = new ServiceMetricEvent.Builder();
builder.setDimension("category", workerCategory);
builder.setDimension("workerVersion", workerVersion);
builder.setDimension(DruidMetrics.CATEGORY, workerCategory);
builder.setDimension(DruidMetrics.WORKER_VERSION, workerVersion);
emitter.emit(builder.build(metricName, value));
}
}

View File

@ -114,6 +114,7 @@ import org.apache.druid.server.initialization.ZkPathsConfig;
import org.apache.druid.server.initialization.jetty.JettyServerInitializer;
import org.apache.druid.server.lookup.cache.LookupCoordinatorManager;
import org.apache.druid.server.lookup.cache.LookupCoordinatorManagerConfig;
import org.apache.druid.server.metrics.ServiceStatusMonitor;
import org.apache.druid.server.router.TieredBrokerConfig;
import org.eclipse.jetty.server.Server;
import org.joda.time.Duration;
@ -332,7 +333,7 @@ public class CliCoordinator extends ServerRunnable
binder.bind(RowIngestionMetersFactory.class).toProvider(Providers.of(null));
// Bind HeartbeatSupplier only when the service operates independently of Overlord.
binder.bind(new TypeLiteral<Supplier<Map<String, Object>>>() {})
.annotatedWith(Names.named("heartbeat"))
.annotatedWith(Names.named(ServiceStatusMonitor.HEARTBEAT_TAGS_BINDING))
.toProvider(HeartbeatSupplier.class);
}

View File

@ -20,7 +20,9 @@
package org.apache.druid.cli;
import com.github.rvesse.airline.annotations.Command;
import com.google.common.base.Supplier;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.inject.Binder;
import com.google.inject.Inject;
@ -28,6 +30,7 @@ import com.google.inject.Key;
import com.google.inject.Module;
import com.google.inject.Provides;
import com.google.inject.multibindings.MapBinder;
import com.google.inject.name.Named;
import com.google.inject.name.Names;
import com.google.inject.util.Providers;
import org.apache.druid.curator.ZkEnablementConfig;
@ -67,6 +70,7 @@ import org.apache.druid.indexing.worker.shuffle.LocalIntermediaryDataManager;
import org.apache.druid.indexing.worker.shuffle.ShuffleModule;
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.metadata.input.InputSourceModule;
import org.apache.druid.query.DruidMetrics;
import org.apache.druid.query.lookup.LookupSerdeModule;
import org.apache.druid.segment.incremental.RowIngestionMetersFactory;
import org.apache.druid.segment.realtime.appenderator.AppenderatorsManager;
@ -76,11 +80,13 @@ import org.apache.druid.segment.realtime.firehose.NoopChatHandlerProvider;
import org.apache.druid.server.DruidNode;
import org.apache.druid.server.http.SelfDiscoveryResource;
import org.apache.druid.server.initialization.jetty.JettyServerInitializer;
import org.apache.druid.server.metrics.ServiceStatusMonitor;
import org.apache.druid.server.metrics.WorkerTaskCountStatsProvider;
import org.apache.druid.timeline.PruneLastCompactionState;
import org.eclipse.jetty.server.Server;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
@ -195,6 +201,18 @@ public class CliMiddleManager extends ServerRunnable
biddy.addBinding("deepstore").to(DeepStorageIntermediaryDataManager.class).in(LazySingleton.class);
}
@Provides
@LazySingleton
@Named(ServiceStatusMonitor.HEARTBEAT_TAGS_BINDING)
public Supplier<Map<String, Object>> heartbeatDimensions(WorkerConfig workerConfig, WorkerTaskManager workerTaskManager)
{
return () -> ImmutableMap.of(
DruidMetrics.WORKER_VERSION, workerConfig.getVersion(),
DruidMetrics.CATEGORY, workerConfig.getCategory(),
DruidMetrics.STATUS, workerTaskManager.isWorkerEnabled() ? "Enabled" : "Disabled"
);
}
@Provides
@LazySingleton
public Worker getWorker(@Self DruidNode node, WorkerConfig config)

View File

@ -118,6 +118,7 @@ import org.apache.druid.server.http.SelfDiscoveryResource;
import org.apache.druid.server.initialization.ServerConfig;
import org.apache.druid.server.initialization.jetty.JettyServerInitUtils;
import org.apache.druid.server.initialization.jetty.JettyServerInitializer;
import org.apache.druid.server.metrics.ServiceStatusMonitor;
import org.apache.druid.server.metrics.TaskCountStatsProvider;
import org.apache.druid.server.metrics.TaskSlotCountStatsProvider;
import org.apache.druid.server.security.AuthConfig;
@ -361,7 +362,7 @@ public class CliOverlord extends ServerRunnable
@Provides
@LazySingleton
@Named("heartbeat")
@Named(ServiceStatusMonitor.HEARTBEAT_TAGS_BINDING)
public Supplier<Map<String, Object>> getHeartbeatSupplier(TaskMaster taskMaster)
{
return () -> {

View File

@ -24,8 +24,11 @@ import com.github.rvesse.airline.annotations.Arguments;
import com.github.rvesse.airline.annotations.Command;
import com.github.rvesse.airline.annotations.Option;
import com.github.rvesse.airline.annotations.restrictions.Required;
import com.google.common.base.Supplier;
import com.google.common.base.Suppliers;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.inject.Binder;
import com.google.inject.Inject;
@ -96,6 +99,7 @@ import org.apache.druid.java.util.common.lifecycle.Lifecycle;
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.metadata.IndexerSQLMetadataStorageCoordinator;
import org.apache.druid.metadata.input.InputSourceModule;
import org.apache.druid.query.DruidMetrics;
import org.apache.druid.query.QuerySegmentWalker;
import org.apache.druid.query.lookup.LookupModule;
import org.apache.druid.segment.handoff.CoordinatorBasedSegmentHandoffNotifierConfig;
@ -124,12 +128,14 @@ import org.apache.druid.server.http.SegmentListerResource;
import org.apache.druid.server.initialization.jetty.ChatHandlerServerModule;
import org.apache.druid.server.initialization.jetty.JettyServerInitializer;
import org.apache.druid.server.metrics.DataSourceTaskIdHolder;
import org.apache.druid.server.metrics.ServiceStatusMonitor;
import org.eclipse.jetty.server.Server;
import java.io.File;
import java.io.IOException;
import java.nio.file.Paths;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
@ -259,6 +265,21 @@ public class CliPeon extends GuiceRunnable
}
}
@Provides
@LazySingleton
@Named(ServiceStatusMonitor.HEARTBEAT_TAGS_BINDING)
public Supplier<Map<String, Object>> heartbeatDimensions(Task task)
{
return Suppliers.ofInstance(
ImmutableMap.of(
DruidMetrics.TASK_ID, task.getId(),
DruidMetrics.DATASOURCE, task.getDataSource(),
DruidMetrics.TASK_TYPE, task.getType(),
DruidMetrics.GROUP_ID, task.getGroupId()
)
);
}
@Provides
@LazySingleton
public Task readTask(@Json ObjectMapper mapper, ExecutorLifecycleConfig config)