add a new query laning metrics to visualize lane assignment (#12111)

* add a new query laning metrics to visualize lane assignment

* fixes :spotbugs check

* Update docs/operations/metrics.md

Co-authored-by: Benedict Jin <asdf2014@apache.org>

* Update server/src/main/java/org/apache/druid/server/QueryScheduler.java

Co-authored-by: Benedict Jin <asdf2014@apache.org>

* Update server/src/main/java/org/apache/druid/server/QueryScheduler.java

Co-authored-by: Benedict Jin <asdf2014@apache.org>

Co-authored-by: Benedict Jin <asdf2014@apache.org>
This commit is contained in:
Sandeep 2022-03-04 15:21:17 +08:00 committed by GitHub
parent 36bc41855d
commit 61e1ffc7f7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 41 additions and 4 deletions

View File

@ -58,6 +58,7 @@ Available Metrics
|`query/interrupted/count`|number of queries interrupted due to cancellation.|This metric is only available if the QueryCountStatsMonitor module is included.|| |`query/interrupted/count`|number of queries interrupted due to cancellation.|This metric is only available if the QueryCountStatsMonitor module is included.||
|`query/timeout/count`|number of timed out queries.|This metric is only available if the QueryCountStatsMonitor module is included.|| |`query/timeout/count`|number of timed out queries.|This metric is only available if the QueryCountStatsMonitor module is included.||
|`query/segments/count`|This metric is not enabled by default. See the `QueryMetrics` Interface for reference regarding enabling this metric. Number of segments that will be touched by the query. In the broker, it makes a plan to distribute the query to realtime tasks and historicals based on a snapshot of segment distribution state. If there are some segments moved after this snapshot is created, certain historicals and realtime tasks can report those segments as missing to the broker. The broker will re-send the query to the new servers that serve those segments after move. In this case, those segments can be counted more than once in this metric.|Varies.| |`query/segments/count`|This metric is not enabled by default. See the `QueryMetrics` Interface for reference regarding enabling this metric. Number of segments that will be touched by the query. In the broker, it makes a plan to distribute the query to realtime tasks and historicals based on a snapshot of segment distribution state. If there are some segments moved after this snapshot is created, certain historicals and realtime tasks can report those segments as missing to the broker. The broker will re-send the query to the new servers that serve those segments after move. In this case, those segments can be counted more than once in this metric.|Varies.|
|`query/priority`|Assigned lane and priority, only if Laning strategy is enabled. Refer to [Laning strategies](../configuration/index.md#laning-strategies)|lane, dataSource, type|0|
|`sqlQuery/time`|Milliseconds taken to complete a SQL query.|id, nativeQueryIds, dataSource, remoteAddress, success.|< 1s| |`sqlQuery/time`|Milliseconds taken to complete a SQL query.|id, nativeQueryIds, dataSource, remoteAddress, success.|< 1s|
|`sqlQuery/bytes`|number of bytes returned in SQL query response.|id, nativeQueryIds, dataSource, remoteAddress, success.| | |`sqlQuery/bytes`|number of bytes returned in SQL query response.|id, nativeQueryIds, dataSource, remoteAddress, success.| |

View File

@ -32,6 +32,10 @@ import org.apache.druid.client.SegmentServerSelector;
import org.apache.druid.java.util.common.concurrent.Execs; import org.apache.druid.java.util.common.concurrent.Execs;
import org.apache.druid.java.util.common.guava.LazySequence; import org.apache.druid.java.util.common.guava.LazySequence;
import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.java.util.common.guava.Sequence;
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.java.util.emitter.core.NoopEmitter;
import org.apache.druid.java.util.emitter.service.ServiceEmitter;
import org.apache.druid.java.util.emitter.service.ServiceMetricEvent;
import org.apache.druid.query.Query; import org.apache.druid.query.Query;
import org.apache.druid.query.QueryCapacityExceededException; import org.apache.druid.query.QueryCapacityExceededException;
import org.apache.druid.query.QueryContexts; import org.apache.druid.query.QueryContexts;
@ -58,6 +62,7 @@ import java.util.Set;
*/ */
public class QueryScheduler implements QueryWatcher public class QueryScheduler implements QueryWatcher
{ {
private static final Logger LOGGER = new Logger(QueryScheduler.class);
public static final int UNAVAILABLE = -1; public static final int UNAVAILABLE = -1;
public static final String TOTAL = "total"; public static final String TOTAL = "total";
private final int totalCapacity; private final int totalCapacity;
@ -86,12 +91,14 @@ public class QueryScheduler implements QueryWatcher
* but it is OK in most cases since they will be cleaned up once the query is done. * but it is OK in most cases since they will be cleaned up once the query is done.
*/ */
private final SetMultimap<String, String> queryDatasources; private final SetMultimap<String, String> queryDatasources;
private final ServiceEmitter emitter;
public QueryScheduler( public QueryScheduler(
int totalNumThreads, int totalNumThreads,
QueryPrioritizationStrategy prioritizationStrategy, QueryPrioritizationStrategy prioritizationStrategy,
QueryLaningStrategy laningStrategy, QueryLaningStrategy laningStrategy,
ServerConfig serverConfig ServerConfig serverConfig,
ServiceEmitter emitter
) )
{ {
this.prioritizationStrategy = prioritizationStrategy; this.prioritizationStrategy = prioritizationStrategy;
@ -108,6 +115,21 @@ public class QueryScheduler implements QueryWatcher
this.totalCapacity = serverConfig.getNumThreads(); this.totalCapacity = serverConfig.getNumThreads();
} }
this.laneRegistry = BulkheadRegistry.of(getLaneConfigs(limitTotal)); this.laneRegistry = BulkheadRegistry.of(getLaneConfigs(limitTotal));
this.emitter = emitter;
}
/**
* Keeping the old constructor as many test classes are dependent on this
*/
@VisibleForTesting
public QueryScheduler(
int totalNumThreads,
QueryPrioritizationStrategy prioritizationStrategy,
QueryLaningStrategy laningStrategy,
ServerConfig serverConfig
)
{
this(totalNumThreads, prioritizationStrategy, laningStrategy, serverConfig, new ServiceEmitter("test", "localhost", new NoopEmitter()));
} }
@Override @Override
@ -137,6 +159,12 @@ public class QueryScheduler implements QueryWatcher
Optional<Integer> priority = prioritizationStrategy.computePriority(queryPlus, segments); Optional<Integer> priority = prioritizationStrategy.computePriority(queryPlus, segments);
query = priority.map(query::withPriority).orElse(query); query = priority.map(query::withPriority).orElse(query);
Optional<String> lane = laningStrategy.computeLane(queryPlus.withQuery(query), segments); Optional<String> lane = laningStrategy.computeLane(queryPlus.withQuery(query), segments);
LOGGER.info("[%s] lane assigned to [%s] query with [%,d] priority", lane.orElse("default"), query.getType(), priority.orElse(Integer.valueOf(0)));
final ServiceMetricEvent.Builder builderUsr = ServiceMetricEvent.builder().setFeed("metrics")
.setDimension("lane", lane.orElse("default"))
.setDimension("dataSource", query.getDataSource().getTableNames())
.setDimension("type", query.getType());
emitter.emit(builderUsr.build("query/priority", priority.orElse(Integer.valueOf(0))));
return lane.map(query::withLane).orElse(query); return lane.map(query::withLane).orElse(query);
} }

View File

@ -23,12 +23,14 @@ import com.fasterxml.jackson.annotation.JacksonInject;
import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonCreator;
import com.google.inject.Inject; import com.google.inject.Inject;
import com.google.inject.Provider; import com.google.inject.Provider;
import org.apache.druid.java.util.emitter.service.ServiceEmitter;
import org.apache.druid.server.initialization.ServerConfig; import org.apache.druid.server.initialization.ServerConfig;
public class QuerySchedulerProvider extends QuerySchedulerConfig implements Provider<QueryScheduler> public class QuerySchedulerProvider extends QuerySchedulerConfig implements Provider<QueryScheduler>
{ {
private final ServerConfig serverConfig; private final ServerConfig serverConfig;
private final ServiceEmitter emitter;
/** /**
* This needs to be both marked as guice injected to be bound correctly, and also marked with json creator and * This needs to be both marked as guice injected to be bound correctly, and also marked with json creator and
@ -36,14 +38,15 @@ public class QuerySchedulerProvider extends QuerySchedulerConfig implements Prov
*/ */
@Inject @Inject
@JsonCreator @JsonCreator
public QuerySchedulerProvider(@JacksonInject ServerConfig serverConfig) public QuerySchedulerProvider(@JacksonInject ServerConfig serverConfig, @JacksonInject ServiceEmitter emitter)
{ {
this.serverConfig = serverConfig; this.serverConfig = serverConfig;
this.emitter = emitter;
} }
@Override @Override
public QueryScheduler get() public QueryScheduler get()
{ {
return new QueryScheduler(getNumThreads(), getPrioritizationStrategy(), getLaningStrategy(), serverConfig); return new QueryScheduler(getNumThreads(), getPrioritizationStrategy(), getLaningStrategy(), serverConfig, emitter);
} }
} }

View File

@ -44,6 +44,8 @@ import org.apache.druid.java.util.common.guava.SequenceWrapper;
import org.apache.druid.java.util.common.guava.Sequences; import org.apache.druid.java.util.common.guava.Sequences;
import org.apache.druid.java.util.common.guava.Yielder; import org.apache.druid.java.util.common.guava.Yielder;
import org.apache.druid.java.util.common.guava.Yielders; import org.apache.druid.java.util.common.guava.Yielders;
import org.apache.druid.java.util.emitter.core.NoopEmitter;
import org.apache.druid.java.util.emitter.service.ServiceEmitter;
import org.apache.druid.query.Query; import org.apache.druid.query.Query;
import org.apache.druid.query.QueryCapacityExceededException; import org.apache.druid.query.QueryCapacityExceededException;
import org.apache.druid.query.QueryContexts; import org.apache.druid.query.QueryContexts;
@ -697,13 +699,16 @@ public class QuerySchedulerTest
ImmutableList.of( ImmutableList.of(
binder -> { binder -> {
binder.bind(ServerConfig.class).toInstance(new ServerConfig()); binder.bind(ServerConfig.class).toInstance(new ServerConfig());
binder.bind(ServiceEmitter.class).toInstance(new ServiceEmitter("test", "localhost", new NoopEmitter()));
JsonConfigProvider.bind(binder, "druid.query.scheduler", QuerySchedulerProvider.class, Global.class); JsonConfigProvider.bind(binder, "druid.query.scheduler", QuerySchedulerProvider.class, Global.class);
} }
) )
); );
ObjectMapper mapper = injector.getInstance(Key.get(ObjectMapper.class, Json.class)); ObjectMapper mapper = injector.getInstance(Key.get(ObjectMapper.class, Json.class));
mapper.setInjectableValues( mapper.setInjectableValues(
new InjectableValues.Std().addValue(ServerConfig.class, injector.getInstance(ServerConfig.class)) new InjectableValues.Std()
.addValue(ServerConfig.class, injector.getInstance(ServerConfig.class))
.addValue(ServiceEmitter.class, injector.getInstance(ServiceEmitter.class))
); );
return injector; return injector;
} }