diff --git a/docs/operations/metrics.md b/docs/operations/metrics.md index d907e4c7eea..95099406f28 100644 --- a/docs/operations/metrics.md +++ b/docs/operations/metrics.md @@ -64,12 +64,12 @@ Metrics may have additional dimensions beyond those listed above. |`sqlQuery/time`|Milliseconds taken to complete a SQL query.|`id`, `nativeQueryIds`, `dataSource`, `remoteAddress`, `success`, `engine`|< 1s| |`sqlQuery/planningTimeMs`|Milliseconds taken to plan a SQL to native query.|`id`, `nativeQueryIds`, `dataSource`, `remoteAddress`, `success`, `engine`| | |`sqlQuery/bytes`|Number of bytes returned in the SQL query response.|`id`, `nativeQueryIds`, `dataSource`, `remoteAddress`, `success`, `engine`| | -|`init/serverview/time`|Time taken to initialize the broker server view. Useful to detect if brokers are taking too long to start.||Depends on the number of segments.| -|`init/metadatacache/time`|Time taken to initialize the broker segment metadata cache. Useful to detect if brokers are taking too long to start||Depends on the number of segments.| -|`segment/metadatacache/refresh/count`|Number of segments to refresh in broker segment metadata cache.|`dataSource`| -|`segment/metadatacache/refresh/time`|Time taken to refresh segments in broker segment metadata cache.|`dataSource`| -|`segment/serverview/sync/healthy`|Sync status of the Broker with a segment-loading server such as a Historical or Peon. Emitted only when [HTTP-based server view](../configuration/index.md#segment-management) is enabled. This metric can be used in conjunction with `segment/serverview/sync/unstableTime` to debug slow startup of Brokers.|`server`, `tier`|1 for fully synced servers, 0 otherwise| -|`segment/serverview/sync/unstableTime`|Time in milliseconds for which the Broker has been failing to sync with a segment-loading server. Emitted only when [HTTP-based server view](../configuration/index.md#segment-management) is enabled.|`server`, `tier`|Not emitted for synced servers.| +|`serverview/init/time`|Time taken to initialize the broker server view. Useful to detect if brokers are taking too long to start.||Depends on the number of segments.| +|`metadatacache/init/time`|Time taken to initialize the broker segment metadata cache. Useful to detect if brokers are taking too long to start||Depends on the number of segments.| +|`metadatacache/refresh/count`|Number of segments to refresh in broker segment metadata cache.|`dataSource`| +|`metadatacache/refresh/time`|Time taken to refresh segments in broker segment metadata cache.|`dataSource`| +|`serverview/sync/healthy`|Sync status of the Broker with a segment-loading server such as a Historical or Peon. Emitted only when [HTTP-based server view](../configuration/index.md#segment-management) is enabled. This metric can be used in conjunction with `serverview/sync/unstableTime` to debug slow startup of Brokers.|`server`, `tier`|1 for fully synced servers, 0 otherwise| +|`serverview/sync/unstableTime`|Time in milliseconds for which the Broker has been failing to sync with a segment-loading server. Emitted only when [HTTP-based server view](../configuration/index.md#segment-management) is enabled.|`server`, `tier`|Not emitted for synced servers.| ### Historical @@ -328,9 +328,9 @@ These metrics are for the Druid Coordinator and are reset each time the Coordina |`metadata/kill/compaction/count`|Total number of compaction configurations that were automatically deleted from metadata store per each Coordinator kill compaction configuration duty run. This metric is only emitted when `druid.coordinator.kill.compaction.on` is set to true.| |Varies| |`metadata/kill/rule/count`|Total number of rules that were automatically deleted from metadata store per each Coordinator kill rule duty run. This metric can help adjust `druid.coordinator.kill.rule.durationToRetain` configuration based on whether more or less rules need to be deleted per cycle. This metric is only emitted when `druid.coordinator.kill.rule.on` is set to true.| |Varies| |`metadata/kill/datasource/count`|Total number of datasource metadata that were automatically deleted from metadata store per each Coordinator kill datasource duty run. Note that datasource metadata only exists for datasource created from supervisor. This metric can help adjust `druid.coordinator.kill.datasource.durationToRetain` configuration based on whether more or less datasource metadata need to be deleted per cycle. This metric is only emitted when `druid.coordinator.kill.datasource.on` is set to true.| |Varies| -|`init/serverview/time`|Time taken to initialize the coordinator server view.||Depends on the number of segments.| -|`segment/serverview/sync/healthy`|Sync status of the Coordinator with a segment-loading server such as a Historical or Peon. Emitted only when [HTTP-based server view](../configuration/index.md#segment-management) is enabled. You can use this metric in conjunction with `segment/serverview/sync/unstableTime` to debug slow startup of the Coordinator.|`server`, `tier`|1 for fully synced servers, 0 otherwise| -|`segment/serverview/sync/unstableTime`|Time in milliseconds for which the Coordinator has been failing to sync with a segment-loading server. Emitted only when [HTTP-based server view](../configuration/index.md#segment-management) is enabled.|`server`, `tier`|Not emitted for synced servers.| +|`serverview/init/time`|Time taken to initialize the coordinator server view.||Depends on the number of segments.| +|`serverview/sync/healthy`|Sync status of the Coordinator with a segment-loading server such as a Historical or Peon. Emitted only when [HTTP-based server view](../configuration/index.md#segment-management) is enabled. You can use this metric in conjunction with `serverview/sync/unstableTime` to debug slow startup of the Coordinator.|`server`, `tier`|1 for fully synced servers, 0 otherwise| +|`serverview/sync/unstableTime`|Time in milliseconds for which the Coordinator has been failing to sync with a segment-loading server. Emitted only when [HTTP-based server view](../configuration/index.md#segment-management) is enabled.|`server`, `tier`|Not emitted for synced servers.| ## General Health diff --git a/server/src/main/java/org/apache/druid/client/BrokerServerView.java b/server/src/main/java/org/apache/druid/client/BrokerServerView.java index a10dd285390..9c35b0c9030 100644 --- a/server/src/main/java/org/apache/druid/client/BrokerServerView.java +++ b/server/src/main/java/org/apache/druid/client/BrokerServerView.java @@ -185,7 +185,7 @@ public class BrokerServerView implements TimelineServerView final long endMillis = System.currentTimeMillis(); log.info("BrokerServerView initialized in [%,d] ms.", endMillis - startMillis); emitter.emit(ServiceMetricEvent.builder().build( - "init/serverview/time", + "serverview/init/time", endMillis - startMillis )); } diff --git a/server/src/main/java/org/apache/druid/client/CoordinatorServerView.java b/server/src/main/java/org/apache/druid/client/CoordinatorServerView.java index cc1159fe5a5..56ad1a78872 100644 --- a/server/src/main/java/org/apache/druid/client/CoordinatorServerView.java +++ b/server/src/main/java/org/apache/druid/client/CoordinatorServerView.java @@ -125,7 +125,7 @@ public class CoordinatorServerView implements InventoryView final long endMillis = System.currentTimeMillis(); log.info("%s initialized in [%,d] ms.", getClass().getSimpleName(), endMillis - startMillis); emitter.emit(ServiceMetricEvent.builder().build( - "init/serverview/time", + "serverview/init/time", endMillis - startMillis )); } diff --git a/server/src/main/java/org/apache/druid/client/HttpServerInventoryView.java b/server/src/main/java/org/apache/druid/client/HttpServerInventoryView.java index 2c30c168132..7b67e8802b3 100644 --- a/server/src/main/java/org/apache/druid/client/HttpServerInventoryView.java +++ b/server/src/main/java/org/apache/druid/client/HttpServerInventoryView.java @@ -495,12 +495,12 @@ public class HttpServerInventoryView implements ServerInventoryView, FilteredSer final boolean isSynced = serverHolder.syncer.isSyncedSuccessfully(); serviceEmitter.emit( - eventBuilder.build("segment/serverview/sync/healthy", isSynced ? 1 : 0) + eventBuilder.build("serverview/sync/healthy", isSynced ? 1 : 0) ); final long unstableTimeMillis = serverHolder.syncer.getUnstableTimeMillis(); if (unstableTimeMillis > 0) { serviceEmitter.emit( - eventBuilder.build("segment/serverview/sync/unstableTime", unstableTimeMillis) + eventBuilder.build("serverview/sync/unstableTime", unstableTimeMillis) ); } }); diff --git a/server/src/test/java/org/apache/druid/client/HttpServerInventoryViewTest.java b/server/src/test/java/org/apache/druid/client/HttpServerInventoryViewTest.java index 70ec2b2fa4a..76f23b5481a 100644 --- a/server/src/test/java/org/apache/druid/client/HttpServerInventoryViewTest.java +++ b/server/src/test/java/org/apache/druid/client/HttpServerInventoryViewTest.java @@ -77,8 +77,8 @@ public class HttpServerInventoryViewTest private static final String EXEC_NAME_PREFIX = "InventoryViewTest"; - private static final String METRIC_SUCCESS = "segment/serverview/sync/healthy"; - private static final String METRIC_UNSTABLE_TIME = "segment/serverview/sync/unstableTime"; + private static final String METRIC_SUCCESS = "serverview/sync/healthy"; + private static final String METRIC_UNSTABLE_TIME = "serverview/sync/unstableTime"; private StubServiceEmitter serviceEmitter; diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/schema/SegmentMetadataCache.java b/sql/src/main/java/org/apache/druid/sql/calcite/schema/SegmentMetadataCache.java index 278a9010ebe..37ff3bda313 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/schema/SegmentMetadataCache.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/schema/SegmentMetadataCache.java @@ -397,7 +397,7 @@ public class SegmentMetadataCache final long endMillis = System.currentTimeMillis(); log.info("%s initialized in [%,d] ms.", getClass().getSimpleName(), endMillis - startMillis); emitter.emit(ServiceMetricEvent.builder().build( - "init/metadatacache/time", + "metadatacache/init/time", endMillis - startMillis )); } @@ -722,7 +722,7 @@ public class SegmentMetadataCache final ServiceMetricEvent.Builder builder = new ServiceMetricEvent.Builder().setDimension(DruidMetrics.DATASOURCE, dataSource); - emitter.emit(builder.build("segment/metadatacache/refresh/count", segments.size())); + emitter.emit(builder.build("metadatacache/refresh/count", segments.size())); // Segment id string -> SegmentId object. final Map segmentIdMap = Maps.uniqueIndex(segments, SegmentId::toString); @@ -793,7 +793,7 @@ public class SegmentMetadataCache long refreshDurationMillis = stopwatch.elapsed(TimeUnit.MILLISECONDS); - emitter.emit(builder.build("segment/metadatacache/refresh/time", refreshDurationMillis)); + emitter.emit(builder.build("metadatacache/refresh/time", refreshDurationMillis)); log.debug( "Refreshed metadata for dataSource [%s] in %,d ms (%d segments queried, %d segments left).", diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/schema/SegmentMetadataCacheTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/schema/SegmentMetadataCacheTest.java index 3d3d73741ad..b5414905489 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/schema/SegmentMetadataCacheTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/schema/SegmentMetadataCacheTest.java @@ -1492,8 +1492,8 @@ public class SegmentMetadataCacheTest extends SegmentMetadataCacheCommon Assert.assertTrue(addSegmentLatch.await(1, TimeUnit.SECONDS)); schema.refresh(segments.stream().map(DataSegment::getId).collect(Collectors.toSet()), Sets.newHashSet(datasource)); - emitter.verifyEmitted("segment/metadatacache/refresh/time", ImmutableMap.of(DruidMetrics.DATASOURCE, datasource), 1); - emitter.verifyEmitted("segment/metadatacache/refresh/count", ImmutableMap.of(DruidMetrics.DATASOURCE, datasource), 1); + emitter.verifyEmitted("metadatacache/refresh/time", ImmutableMap.of(DruidMetrics.DATASOURCE, datasource), 1); + emitter.verifyEmitted("metadatacache/refresh/count", ImmutableMap.of(DruidMetrics.DATASOURCE, datasource), 1); } private static DataSegment newSegment(String datasource, int partitionId)