mirror of
https://github.com/apache/druid.git
synced 2025-02-09 03:24:55 +00:00
Improve task pause logging and metrics for streaming ingestion (#13313)
* Improve task pause logging and metrics for streaming ingestion * Add metrics doc * Fix spelling
This commit is contained in:
parent
b1eaf7a21f
commit
a738ac9ad7
@ -221,7 +221,8 @@ batch ingestion emit the following metrics. These metrics are deltas for each em
|
|||||||
|`ingest/sink/count`|Number of sinks not handoffed.|dataSource, taskId, taskType.|1~3|
|
|`ingest/sink/count`|Number of sinks not handoffed.|dataSource, taskId, taskType.|1~3|
|
||||||
|`ingest/events/messageGap`|Time gap in milliseconds between the latest ingested event timestamp and the current system timestamp of metrics emission. |dataSource, taskId, taskType.|Greater than 0, depends on the time carried in event |
|
|`ingest/events/messageGap`|Time gap in milliseconds between the latest ingested event timestamp and the current system timestamp of metrics emission. |dataSource, taskId, taskType.|Greater than 0, depends on the time carried in event |
|
||||||
|`ingest/notices/queueSize`|Number of pending notices to be processed by the coordinator|dataSource.|Typically 0 and occasionally in lower single digits. Should not be a very high number. |
|
|`ingest/notices/queueSize`|Number of pending notices to be processed by the coordinator|dataSource.|Typically 0 and occasionally in lower single digits. Should not be a very high number. |
|
||||||
|`ingest/notices/time`|Milliseconds taken to process a notice by the supervisor|dataSource, noticeType.| < 1s. |
|
|`ingest/notices/time`|Milliseconds taken to process a notice by the supervisor|dataSource| < 1s. |
|
||||||
|
|`ingest/pause/time`|Milliseconds spent by a task in a paused state without ingesting.|dataSource, taskId| < 10 seconds.|
|
||||||
|
|
||||||
|
|
||||||
Note: If the JVM does not support CPU time measurement for the current thread, ingest/merge/cpu and ingest/persists/cpu will be 0.
|
Note: If the JVM does not support CPU time measurement for the current thread, ingest/merge/cpu and ingest/persists/cpu will be 0.
|
||||||
|
@ -1311,14 +1311,16 @@ public abstract class SeekableStreamIndexTaskRunner<PartitionIdType, SequenceOff
|
|||||||
status = Status.PAUSED;
|
status = Status.PAUSED;
|
||||||
hasPaused.signalAll();
|
hasPaused.signalAll();
|
||||||
|
|
||||||
log.debug("Received pause command, pausing ingestion until resumed.");
|
long pauseTime = System.currentTimeMillis();
|
||||||
|
log.info("Received pause command, pausing ingestion until resumed.");
|
||||||
while (pauseRequested) {
|
while (pauseRequested) {
|
||||||
shouldResume.await();
|
shouldResume.await();
|
||||||
}
|
}
|
||||||
|
|
||||||
status = Status.READING;
|
status = Status.READING;
|
||||||
shouldResume.signalAll();
|
shouldResume.signalAll();
|
||||||
log.debug("Received resume command, resuming ingestion.");
|
log.info("Received resume command, resuming ingestion.");
|
||||||
|
task.emitMetric(toolbox.getEmitter(), "ingest/pause/time", System.currentTimeMillis() - pauseTime);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -3064,7 +3064,7 @@ public abstract class SeekableStreamSupervisor<PartitionIdType, SequenceOffsetTy
|
|||||||
}
|
}
|
||||||
|
|
||||||
log.info(
|
log.info(
|
||||||
"Setting endOffsets for tasks in taskGroup [%d] to %s and resuming",
|
"Setting endOffsets for tasks in taskGroup [%d] to %s",
|
||||||
taskGroup.groupId,
|
taskGroup.groupId,
|
||||||
endOffsets
|
endOffsets
|
||||||
);
|
);
|
||||||
@ -3083,6 +3083,8 @@ public abstract class SeekableStreamSupervisor<PartitionIdType, SequenceOffsetTy
|
|||||||
taskId
|
taskId
|
||||||
);
|
);
|
||||||
taskGroup.tasks.remove(taskId);
|
taskGroup.tasks.remove(taskId);
|
||||||
|
} else {
|
||||||
|
log.info("Successfully set endOffsets for task[%s] and resumed it", setEndOffsetTaskIds.get(i));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user