mirror of
https://github.com/apache/druid.git
synced 2025-02-17 07:25:02 +00:00
Added backpressure metric (#6335)
* Added backpressure metric * Updated channelReadable to AtomicBoolean and fixed broken test * Moved backpressure metric logic to NettyHttpClient * Fix placement of calculating backPressureDuration
This commit is contained in:
parent
f09e718c68
commit
5a894f830b
@ -72,6 +72,7 @@ public class NettyHttpClient extends AbstractHttpClient
|
|||||||
private final ResourcePool<String, ChannelFuture> pool;
|
private final ResourcePool<String, ChannelFuture> pool;
|
||||||
private final HttpClientConfig.CompressionCodec compressionCodec;
|
private final HttpClientConfig.CompressionCodec compressionCodec;
|
||||||
private final Duration defaultReadTimeout;
|
private final Duration defaultReadTimeout;
|
||||||
|
private long backPressureStartTimeNs;
|
||||||
|
|
||||||
NettyHttpClient(
|
NettyHttpClient(
|
||||||
ResourcePool<String, ChannelFuture> pool,
|
ResourcePool<String, ChannelFuture> pool,
|
||||||
@ -212,9 +213,13 @@ public class NettyHttpClient extends AbstractHttpClient
|
|||||||
if (suspendWatermark >= 0 && resumeWatermark >= suspendWatermark) {
|
if (suspendWatermark >= 0 && resumeWatermark >= suspendWatermark) {
|
||||||
suspendWatermark = -1;
|
suspendWatermark = -1;
|
||||||
channel.setReadable(true);
|
channel.setReadable(true);
|
||||||
|
long backPressureDuration = System.nanoTime() - backPressureStartTimeNs;
|
||||||
log.debug("[%s] Resumed reads from channel (chunkNum = %,d).", requestDesc, resumeChunkNum);
|
log.debug("[%s] Resumed reads from channel (chunkNum = %,d).", requestDesc, resumeChunkNum);
|
||||||
|
return backPressureDuration;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return 0; //If we didn't resume, don't know if backpressure was happening
|
||||||
};
|
};
|
||||||
response = handler.handleResponse(httpResponse, trafficCop);
|
response = handler.handleResponse(httpResponse, trafficCop);
|
||||||
if (response.isFinished()) {
|
if (response.isFinished()) {
|
||||||
@ -271,6 +276,7 @@ public class NettyHttpClient extends AbstractHttpClient
|
|||||||
suspendWatermark = Math.max(suspendWatermark, currentChunkNum);
|
suspendWatermark = Math.max(suspendWatermark, currentChunkNum);
|
||||||
if (suspendWatermark > resumeWatermark) {
|
if (suspendWatermark > resumeWatermark) {
|
||||||
channel.setReadable(false);
|
channel.setReadable(false);
|
||||||
|
backPressureStartTimeNs = System.nanoTime();
|
||||||
log.debug("[%s] Suspended reads from channel (chunkNum = %,d).", requestDesc, currentChunkNum);
|
log.debug("[%s] Suspended reads from channel (chunkNum = %,d).", requestDesc, currentChunkNum);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -91,7 +91,8 @@ public interface HttpResponseHandler<IntermediateType, FinalType>
|
|||||||
* Call this to resume reading after you have suspended it.
|
* Call this to resume reading after you have suspended it.
|
||||||
*
|
*
|
||||||
* @param chunkNum chunk number corresponding to the handleChunk() or handleResponse() call from which you
|
* @param chunkNum chunk number corresponding to the handleChunk() or handleResponse() call from which you
|
||||||
|
* @return time that backpressure was applied (channel was closed for reads)
|
||||||
*/
|
*/
|
||||||
void resume(long chunkNum);
|
long resume(long chunkNum);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -243,6 +243,12 @@ public class DefaultQueryMetrics<QueryType extends Query<?>> implements QueryMet
|
|||||||
return reportMillisTimeMetric("query/node/ttfb", timeNs);
|
return reportMillisTimeMetric("query/node/ttfb", timeNs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public QueryMetrics<QueryType> reportBackPressureTime(long timeNs)
|
||||||
|
{
|
||||||
|
return reportMillisTimeMetric("query/node/backpressure", timeNs);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public QueryMetrics<QueryType> reportNodeTime(long timeNs)
|
public QueryMetrics<QueryType> reportNodeTime(long timeNs)
|
||||||
{
|
{
|
||||||
|
@ -278,6 +278,11 @@ public interface QueryMetrics<QueryType extends Query<?>>
|
|||||||
*/
|
*/
|
||||||
QueryMetrics<QueryType> reportNodeTimeToFirstByte(long timeNs);
|
QueryMetrics<QueryType> reportNodeTimeToFirstByte(long timeNs);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Registers "time that channel is unreadable (backpressure)" metric.
|
||||||
|
*/
|
||||||
|
QueryMetrics<QueryType> reportBackPressureTime(long timeNs);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Registers "node time" metric.
|
* Registers "node time" metric.
|
||||||
*/
|
*/
|
||||||
|
@ -208,6 +208,12 @@ public class DefaultSearchQueryMetrics implements SearchQueryMetrics
|
|||||||
return delegateQueryMetrics.reportNodeTimeToFirstByte(timeNs);
|
return delegateQueryMetrics.reportNodeTimeToFirstByte(timeNs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public QueryMetrics reportBackPressureTime(long timeNs)
|
||||||
|
{
|
||||||
|
return delegateQueryMetrics.reportBackPressureTime(timeNs);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public QueryMetrics reportNodeTime(long timeNs)
|
public QueryMetrics reportNodeTime(long timeNs)
|
||||||
{
|
{
|
||||||
|
@ -207,6 +207,12 @@ public class DefaultSelectQueryMetrics implements SelectQueryMetrics
|
|||||||
return delegateQueryMetrics.reportNodeTimeToFirstByte(timeNs);
|
return delegateQueryMetrics.reportNodeTimeToFirstByte(timeNs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public QueryMetrics reportBackPressureTime(long timeNs)
|
||||||
|
{
|
||||||
|
return delegateQueryMetrics.reportBackPressureTime(timeNs);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public QueryMetrics reportNodeTime(long timeNs)
|
public QueryMetrics reportNodeTime(long timeNs)
|
||||||
{
|
{
|
||||||
|
@ -152,5 +152,10 @@ public class DefaultQueryMetricsTest
|
|||||||
actualEvent = cachingEmitter.getLastEmittedEvent().toMap();
|
actualEvent = cachingEmitter.getLastEmittedEvent().toMap();
|
||||||
Assert.assertEquals("query/node/bytes", actualEvent.get("metric"));
|
Assert.assertEquals("query/node/bytes", actualEvent.get("metric"));
|
||||||
Assert.assertEquals(10L, actualEvent.get("value"));
|
Assert.assertEquals(10L, actualEvent.get("value"));
|
||||||
|
|
||||||
|
queryMetrics.reportBackPressureTime(11000001).emit(serviceEmitter);
|
||||||
|
actualEvent = cachingEmitter.getLastEmittedEvent().toMap();
|
||||||
|
Assert.assertEquals("query/node/backpressure", actualEvent.get("metric"));
|
||||||
|
Assert.assertEquals(11L, actualEvent.get("value"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -203,6 +203,7 @@ public class DirectDruidClient<T> implements QueryRunner<T>
|
|||||||
{
|
{
|
||||||
private final AtomicLong totalByteCount = new AtomicLong(0);
|
private final AtomicLong totalByteCount = new AtomicLong(0);
|
||||||
private final AtomicLong queuedByteCount = new AtomicLong(0);
|
private final AtomicLong queuedByteCount = new AtomicLong(0);
|
||||||
|
private final AtomicLong channelSuspendedTime = new AtomicLong(0);
|
||||||
private final BlockingQueue<InputStreamHolder> queue = new LinkedBlockingQueue<>();
|
private final BlockingQueue<InputStreamHolder> queue = new LinkedBlockingQueue<>();
|
||||||
private final AtomicBoolean done = new AtomicBoolean(false);
|
private final AtomicBoolean done = new AtomicBoolean(false);
|
||||||
private final AtomicReference<String> fail = new AtomicReference<>();
|
private final AtomicReference<String> fail = new AtomicReference<>();
|
||||||
@ -244,8 +245,9 @@ public class DirectDruidClient<T> implements QueryRunner<T>
|
|||||||
|
|
||||||
final long currentQueuedByteCount = queuedByteCount.addAndGet(-holder.getLength());
|
final long currentQueuedByteCount = queuedByteCount.addAndGet(-holder.getLength());
|
||||||
if (usingBackpressure && currentQueuedByteCount < maxQueuedBytes) {
|
if (usingBackpressure && currentQueuedByteCount < maxQueuedBytes) {
|
||||||
Preconditions.checkNotNull(trafficCopRef.get(), "No TrafficCop, how can this be?")
|
long backPressureTime = Preconditions.checkNotNull(trafficCopRef.get(), "No TrafficCop, how can this be?")
|
||||||
.resume(holder.getChunkNum());
|
.resume(holder.getChunkNum());
|
||||||
|
channelSuspendedTime.addAndGet(backPressureTime);
|
||||||
}
|
}
|
||||||
|
|
||||||
return holder.getStream();
|
return holder.getStream();
|
||||||
@ -382,6 +384,11 @@ public class DirectDruidClient<T> implements QueryRunner<T>
|
|||||||
QueryMetrics<? super Query<T>> responseMetrics = acquireResponseMetrics();
|
QueryMetrics<? super Query<T>> responseMetrics = acquireResponseMetrics();
|
||||||
responseMetrics.reportNodeTime(nodeTimeNs);
|
responseMetrics.reportNodeTime(nodeTimeNs);
|
||||||
responseMetrics.reportNodeBytes(totalByteCount.get());
|
responseMetrics.reportNodeBytes(totalByteCount.get());
|
||||||
|
|
||||||
|
if (usingBackpressure) {
|
||||||
|
responseMetrics.reportBackPressureTime(channelSuspendedTime.get());
|
||||||
|
}
|
||||||
|
|
||||||
responseMetrics.emit(emitter);
|
responseMetrics.emit(emitter);
|
||||||
synchronized (done) {
|
synchronized (done) {
|
||||||
try {
|
try {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user