mirror of https://github.com/apache/druid.git
Add taskType dimension to task metrics (#5664)
This commit is contained in:
parent
a95ec92296
commit
67d0b0ee42
|
@ -105,22 +105,23 @@ These metrics are only available if the RealtimeMetricsMonitor is included in th
|
|||
|
||||
|Metric|Description|Dimensions|Normal Value|
|
||||
|------|-----------|----------|------------|
|
||||
|`ingest/events/thrownAway`|Number of events rejected because they are outside the windowPeriod.|dataSource.|0|
|
||||
|`ingest/events/unparseable`|Number of events rejected because the events are unparseable.|dataSource.|0|
|
||||
|`ingest/events/duplicate`|Number of events rejected because the events are duplicated.|dataSource.|0|
|
||||
|`ingest/events/processed`|Number of events successfully processed per emission period.|dataSource.|Equal to your # of events per emission period.|
|
||||
|`ingest/rows/output`|Number of Druid rows persisted.|dataSource.|Your # of events with rollup.|
|
||||
|`ingest/persists/count`|Number of times persist occurred.|dataSource.|Depends on configuration.|
|
||||
|`ingest/persists/time`|Milliseconds spent doing intermediate persist.|dataSource.|Depends on configuration. Generally a few minutes at most.|
|
||||
|`ingest/persists/cpu`|Cpu time in Nanoseconds spent on doing intermediate persist.|dataSource.|Depends on configuration. Generally a few minutes at most.|
|
||||
|`ingest/persists/backPressure`|Milliseconds spent creating persist tasks and blocking waiting for them to finish.|dataSource.|0 or very low|
|
||||
|`ingest/persists/failed`|Number of persists that failed.|dataSource.|0|
|
||||
|`ingest/handoff/failed`|Number of handoffs that failed.|dataSource.|0|
|
||||
|`ingest/merge/time`|Milliseconds spent merging intermediate segments|dataSource.|Depends on configuration. Generally a few minutes at most.|
|
||||
|`ingest/merge/cpu`|Cpu time in Nanoseconds spent on merging intermediate segments.|dataSource.|Depends on configuration. Generally a few minutes at most.|
|
||||
|`ingest/handoff/count`|Number of handoffs that happened.|dataSource.|Varies. Generally greater than 0 once every segment granular period if cluster operating normally|
|
||||
|`ingest/sink/count`|Number of sinks not handoffed.|dataSource.|1~3|
|
||||
|`ingest/events/messageGap`|Time gap between the data time in event and current system time.|dataSource.|Greater than 0, depends on the time carried in event |
|
||||
|`ingest/events/thrownAway`|Number of events rejected because they are outside the windowPeriod.|dataSource, taskId, taskType.|0|
|
||||
|`ingest/events/unparseable`|Number of events rejected because the events are unparseable.|dataSource, taskId, taskType.|0|
|
||||
|`ingest/events/duplicate`|Number of events rejected because the events are duplicated.|dataSource, taskId, taskType.|0|
|
||||
|`ingest/events/processed`|Number of events successfully processed per emission period.|dataSource, taskId, taskType.|Equal to your # of events per
|
||||
emission period.|
|
||||
|`ingest/rows/output`|Number of Druid rows persisted.|dataSource, taskId, taskType.|Your # of events with rollup.|
|
||||
|`ingest/persists/count`|Number of times persist occurred.|dataSource, taskId, taskType.|Depends on configuration.|
|
||||
|`ingest/persists/time`|Milliseconds spent doing intermediate persist.|dataSource, taskId, taskType.|Depends on configuration. Generally a few minutes at most.|
|
||||
|`ingest/persists/cpu`|Cpu time in Nanoseconds spent on doing intermediate persist.|dataSource, taskId, taskType.|Depends on configuration. Generally a few minutes at most.|
|
||||
|`ingest/persists/backPressure`|Milliseconds spent creating persist tasks and blocking waiting for them to finish.|dataSource, taskId, taskType.|0 or very low|
|
||||
|`ingest/persists/failed`|Number of persists that failed.|dataSource, taskId, taskType.|0|
|
||||
|`ingest/handoff/failed`|Number of handoffs that failed.|dataSource, taskId, taskType.|0|
|
||||
|`ingest/merge/time`|Milliseconds spent merging intermediate segments|dataSource, taskId, taskType.|Depends on configuration. Generally a few minutes at most.|
|
||||
|`ingest/merge/cpu`|Cpu time in Nanoseconds spent on merging intermediate segments.|dataSource, taskId, taskType.|Depends on configuration. Generally a few minutes at most.|
|
||||
|`ingest/handoff/count`|Number of handoffs that happened.|dataSource, taskId, taskType.|Varies. Generally greater than 0 once every segment granular period if cluster operating normally|
|
||||
|`ingest/sink/count`|Number of sinks not handoffed.|dataSource, taskId, taskType.|1~3|
|
||||
|`ingest/events/messageGap`|Time gap between the data time in event and current system time.|dataSource, taskId, taskType.|Greater than 0, depends on the time carried in event |
|
||||
|`ingest/kafka/lag`|Applicable for Kafka Indexing Service. Total lag between the offsets consumed by the Kafka indexing tasks and latest offsets in Kafka brokers across all partitions. Minimum emission period for this metric is a minute.|dataSource.|Greater than 0, should not be a very high number |
|
||||
|
||||
|
||||
|
@ -201,8 +202,10 @@ The following metric is only available if the EventReceiverFirehoseMonitor modul
|
|||
|
||||
|Metric|Description|Dimensions|Normal Value|
|
||||
|------|-----------|----------|------------|
|
||||
|`ingest/events/buffered`|Number of events queued in the EventReceiverFirehose's buffer|serviceName, dataSource, taskId, bufferCapacity.|Equal to current # of events in the buffer queue.|
|
||||
|`ingest/bytes/received`|Number of bytes received by the EventReceiverFirehose.|serviceName, dataSource, taskId.|Varies.|
|
||||
|`ingest/events/buffered`|Number of events queued in the EventReceiverFirehose's buffer|serviceName, dataSource, taskId, taskType, bufferCapacity
|
||||
.|Equal
|
||||
to current # of events in the buffer queue.|
|
||||
|`ingest/bytes/received`|Number of bytes received by the EventReceiverFirehose.|serviceName, dataSource, taskId, taskType.|Varies.|
|
||||
|
||||
## Sys
|
||||
|
||||
|
|
|
@ -55,6 +55,7 @@ import io.druid.indexing.appenderator.ActionBasedSegmentAllocator;
|
|||
import io.druid.indexing.appenderator.ActionBasedUsedSegmentChecker;
|
||||
import io.druid.indexing.common.IngestionStatsAndErrorsTaskReport;
|
||||
import io.druid.indexing.common.IngestionStatsAndErrorsTaskReportData;
|
||||
import io.druid.indexing.common.TaskRealtimeMetricsMonitorBuilder;
|
||||
import io.druid.indexing.common.TaskReport;
|
||||
import io.druid.indexing.common.TaskStatus;
|
||||
import io.druid.indexing.common.TaskToolbox;
|
||||
|
@ -77,7 +78,6 @@ import io.druid.java.util.common.concurrent.Execs;
|
|||
import io.druid.java.util.common.guava.Sequence;
|
||||
import io.druid.java.util.common.parsers.ParseException;
|
||||
import io.druid.java.util.emitter.EmittingLogger;
|
||||
import io.druid.query.DruidMetrics;
|
||||
import io.druid.query.NoopQueryRunner;
|
||||
import io.druid.query.Query;
|
||||
import io.druid.query.QueryPlus;
|
||||
|
@ -87,7 +87,6 @@ import io.druid.segment.indexing.RealtimeIOConfig;
|
|||
import io.druid.segment.realtime.FireDepartment;
|
||||
import io.druid.segment.realtime.FireDepartmentMetrics;
|
||||
import io.druid.segment.realtime.FireDepartmentMetricsTaskMetricsGetter;
|
||||
import io.druid.segment.realtime.RealtimeMetricsMonitor;
|
||||
import io.druid.segment.realtime.appenderator.Appenderator;
|
||||
import io.druid.segment.realtime.appenderator.AppenderatorDriverAddResult;
|
||||
import io.druid.segment.realtime.appenderator.Appenderators;
|
||||
|
@ -513,12 +512,7 @@ public class KafkaIndexTask extends AbstractTask implements ChatHandler
|
|||
);
|
||||
fireDepartmentMetrics = fireDepartmentForMetrics.getMetrics();
|
||||
metricsGetter = new FireDepartmentMetricsTaskMetricsGetter(fireDepartmentMetrics);
|
||||
toolbox.getMonitorScheduler().addMonitor(
|
||||
new RealtimeMetricsMonitor(
|
||||
ImmutableList.of(fireDepartmentForMetrics),
|
||||
ImmutableMap.of(DruidMetrics.TASK_ID, new String[]{getId()})
|
||||
)
|
||||
);
|
||||
toolbox.getMonitorScheduler().addMonitor(TaskRealtimeMetricsMonitorBuilder.build(this, fireDepartmentForMetrics));
|
||||
|
||||
LookupNodeService lookupNodeService = getContextValue(RealtimeIndexTask.CTX_KEY_LOOKUP_TIER) == null ?
|
||||
toolbox.getLookupNodeService() :
|
||||
|
@ -957,12 +951,7 @@ public class KafkaIndexTask extends AbstractTask implements ChatHandler
|
|||
);
|
||||
fireDepartmentMetrics = fireDepartmentForMetrics.getMetrics();
|
||||
metricsGetter = new FireDepartmentMetricsTaskMetricsGetter(fireDepartmentMetrics);
|
||||
toolbox.getMonitorScheduler().addMonitor(
|
||||
new RealtimeMetricsMonitor(
|
||||
ImmutableList.of(fireDepartmentForMetrics),
|
||||
ImmutableMap.of(DruidMetrics.TASK_ID, new String[]{getId()})
|
||||
)
|
||||
);
|
||||
toolbox.getMonitorScheduler().addMonitor(TaskRealtimeMetricsMonitorBuilder.build(this, fireDepartmentForMetrics));
|
||||
|
||||
LookupNodeService lookupNodeService = getContextValue(RealtimeIndexTask.CTX_KEY_LOOKUP_TIER) == null ?
|
||||
toolbox.getLookupNodeService() :
|
||||
|
|
|
@ -0,0 +1,43 @@
|
|||
/*
|
||||
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Metamarkets licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package io.druid.indexing.common;
|
||||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import io.druid.indexing.common.task.Task;
|
||||
import io.druid.query.DruidMetrics;
|
||||
import io.druid.segment.realtime.FireDepartment;
|
||||
import io.druid.segment.realtime.RealtimeMetricsMonitor;
|
||||
|
||||
public class TaskRealtimeMetricsMonitorBuilder
|
||||
{
|
||||
private TaskRealtimeMetricsMonitorBuilder() {}
|
||||
|
||||
public static RealtimeMetricsMonitor build(Task task, FireDepartment fireDepartment)
|
||||
{
|
||||
return new RealtimeMetricsMonitor(
|
||||
ImmutableList.of(fireDepartment),
|
||||
ImmutableMap.of(
|
||||
DruidMetrics.TASK_ID, new String[]{task.getId()},
|
||||
DruidMetrics.TASK_TYPE, new String[]{task.getType()}
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
|
@ -26,7 +26,6 @@ import com.fasterxml.jackson.annotation.JsonProperty;
|
|||
import com.google.common.base.Optional;
|
||||
import com.google.common.base.Supplier;
|
||||
import com.google.common.base.Throwables;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import com.google.common.collect.Maps;
|
||||
import com.google.common.util.concurrent.Futures;
|
||||
|
@ -44,6 +43,7 @@ import io.druid.indexing.appenderator.ActionBasedSegmentAllocator;
|
|||
import io.druid.indexing.appenderator.ActionBasedUsedSegmentChecker;
|
||||
import io.druid.indexing.common.IngestionStatsAndErrorsTaskReport;
|
||||
import io.druid.indexing.common.IngestionStatsAndErrorsTaskReportData;
|
||||
import io.druid.indexing.common.TaskRealtimeMetricsMonitorBuilder;
|
||||
import io.druid.indexing.common.TaskReport;
|
||||
import io.druid.indexing.common.TaskStatus;
|
||||
import io.druid.indexing.common.TaskToolbox;
|
||||
|
@ -58,7 +58,6 @@ import io.druid.java.util.common.concurrent.ListenableFutures;
|
|||
import io.druid.java.util.common.guava.CloseQuietly;
|
||||
import io.druid.java.util.common.parsers.ParseException;
|
||||
import io.druid.java.util.emitter.EmittingLogger;
|
||||
import io.druid.query.DruidMetrics;
|
||||
import io.druid.query.NoopQueryRunner;
|
||||
import io.druid.query.Query;
|
||||
import io.druid.query.QueryRunner;
|
||||
|
@ -249,12 +248,7 @@ public class AppenderatorDriverRealtimeIndexTask extends AbstractTask implements
|
|||
dataSchema, new RealtimeIOConfig(null, null, null), null
|
||||
);
|
||||
|
||||
final RealtimeMetricsMonitor metricsMonitor = new RealtimeMetricsMonitor(
|
||||
ImmutableList.of(fireDepartmentForMetrics),
|
||||
ImmutableMap.of(
|
||||
DruidMetrics.TASK_ID, new String[]{getId()}
|
||||
)
|
||||
);
|
||||
final RealtimeMetricsMonitor metricsMonitor = TaskRealtimeMetricsMonitorBuilder.build(this, fireDepartmentForMetrics);
|
||||
|
||||
this.metrics = fireDepartmentForMetrics.getMetrics();
|
||||
metricsGetter = new FireDepartmentMetricsTaskMetricsGetter(metrics);
|
||||
|
|
|
@ -30,7 +30,6 @@ import com.google.common.base.Optional;
|
|||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.base.Throwables;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import com.google.common.collect.Iterables;
|
||||
import com.google.common.collect.Maps;
|
||||
import com.google.common.hash.HashFunction;
|
||||
|
@ -48,6 +47,7 @@ import io.druid.indexing.appenderator.ActionBasedUsedSegmentChecker;
|
|||
import io.druid.indexing.common.IngestionStatsAndErrorsTaskReport;
|
||||
import io.druid.indexing.common.IngestionStatsAndErrorsTaskReportData;
|
||||
import io.druid.indexing.common.TaskLock;
|
||||
import io.druid.indexing.common.TaskRealtimeMetricsMonitorBuilder;
|
||||
import io.druid.indexing.common.TaskReport;
|
||||
import io.druid.indexing.common.TaskStatus;
|
||||
import io.druid.indexing.common.TaskToolbox;
|
||||
|
@ -62,7 +62,6 @@ import io.druid.java.util.common.granularity.Granularity;
|
|||
import io.druid.java.util.common.guava.Comparators;
|
||||
import io.druid.java.util.common.logger.Logger;
|
||||
import io.druid.java.util.common.parsers.ParseException;
|
||||
import io.druid.query.DruidMetrics;
|
||||
import io.druid.segment.IndexSpec;
|
||||
import io.druid.segment.indexing.DataSchema;
|
||||
import io.druid.segment.indexing.IOConfig;
|
||||
|
@ -843,12 +842,8 @@ public class IndexTask extends AbstractTask implements ChatHandler
|
|||
buildSegmentsMetricsGetter = new FireDepartmentMetricsTaskMetricsGetter(buildSegmentsFireDepartmentMetrics);
|
||||
|
||||
if (toolbox.getMonitorScheduler() != null) {
|
||||
toolbox.getMonitorScheduler().addMonitor(
|
||||
new RealtimeMetricsMonitor(
|
||||
ImmutableList.of(fireDepartmentForMetrics),
|
||||
ImmutableMap.of(DruidMetrics.TASK_ID, new String[]{getId()})
|
||||
)
|
||||
);
|
||||
final RealtimeMetricsMonitor metricsMonitor = TaskRealtimeMetricsMonitorBuilder.build(this, fireDepartmentForMetrics);
|
||||
toolbox.getMonitorScheduler().addMonitor(metricsMonitor);
|
||||
}
|
||||
|
||||
final IndexIOConfig ioConfig = ingestionSchema.getIOConfig();
|
||||
|
|
|
@ -35,6 +35,7 @@ import io.druid.discovery.DruidNodeDiscoveryProvider;
|
|||
import io.druid.discovery.LookupNodeService;
|
||||
import io.druid.indexing.common.TaskLock;
|
||||
import io.druid.indexing.common.TaskLockType;
|
||||
import io.druid.indexing.common.TaskRealtimeMetricsMonitorBuilder;
|
||||
import io.druid.indexing.common.TaskStatus;
|
||||
import io.druid.indexing.common.TaskToolbox;
|
||||
import io.druid.indexing.common.actions.LockAcquireAction;
|
||||
|
@ -44,7 +45,6 @@ import io.druid.java.util.common.DateTimes;
|
|||
import io.druid.java.util.common.StringUtils;
|
||||
import io.druid.java.util.common.guava.CloseQuietly;
|
||||
import io.druid.java.util.emitter.EmittingLogger;
|
||||
import io.druid.query.DruidMetrics;
|
||||
import io.druid.query.FinalizeResultsQueryRunner;
|
||||
import io.druid.query.Query;
|
||||
import io.druid.query.QueryRunner;
|
||||
|
@ -320,12 +320,8 @@ public class RealtimeIndexTask extends AbstractTask
|
|||
tuningConfig
|
||||
);
|
||||
this.metrics = fireDepartment.getMetrics();
|
||||
final RealtimeMetricsMonitor metricsMonitor = new RealtimeMetricsMonitor(
|
||||
ImmutableList.of(fireDepartment),
|
||||
ImmutableMap.of(
|
||||
DruidMetrics.TASK_ID, new String[]{getId()}
|
||||
)
|
||||
);
|
||||
final RealtimeMetricsMonitor metricsMonitor = TaskRealtimeMetricsMonitorBuilder.build(this, fireDepartment);
|
||||
|
||||
this.queryRunnerFactoryConglomerate = toolbox.getQueryRunnerFactoryConglomerate();
|
||||
|
||||
// NOTE: This pusher selects path based purely on global configuration and the DataSegment, which means
|
||||
|
|
|
@ -358,7 +358,7 @@ public class ForkingTaskRunner implements TaskRunner, TaskLogStreamer
|
|||
}
|
||||
}
|
||||
|
||||
// Add dataSource and taskId for metrics or logging
|
||||
// Add dataSource, taskId and taskType for metrics or logging
|
||||
command.add(
|
||||
StringUtils.format(
|
||||
"-D%s%s=%s",
|
||||
|
@ -375,6 +375,14 @@ public class ForkingTaskRunner implements TaskRunner, TaskLogStreamer
|
|||
task.getId()
|
||||
)
|
||||
);
|
||||
command.add(
|
||||
StringUtils.format(
|
||||
"-D%s%s=%s",
|
||||
MonitorsConfig.METRIC_DIMENSION_PREFIX,
|
||||
DruidMetrics.TASK_TYPE,
|
||||
task.getType()
|
||||
)
|
||||
);
|
||||
|
||||
command.add(StringUtils.format("-Ddruid.host=%s", childHost));
|
||||
command.add(StringUtils.format("-Ddruid.port=%d", childPort));
|
||||
|
|
|
@ -48,7 +48,7 @@ public class EventReceiverFirehoseMonitor extends AbstractMonitor
|
|||
this.register = eventReceiverFirehoseRegister;
|
||||
this.dimensions = MonitorsConfig.extractDimensions(
|
||||
props,
|
||||
Lists.newArrayList(DruidMetrics.DATASOURCE, DruidMetrics.TASK_ID)
|
||||
Lists.newArrayList(DruidMetrics.DATASOURCE, DruidMetrics.TASK_ID, DruidMetrics.TASK_TYPE)
|
||||
);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue