Add server view initialization metrics (#13716)

* Add server view init metrics

* Test coverage

* Rename metrics
This commit is contained in:
AmatyaAvadhanula 2023-02-07 20:02:00 +05:30 committed by GitHub
parent a0f8889f23
commit dcdae84888
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 92 additions and 31 deletions

View File

@ -35,6 +35,7 @@ import org.apache.druid.server.QueryLifecycleFactory;
import org.apache.druid.server.SegmentManager;
import org.apache.druid.server.coordination.DruidServerMetadata;
import org.apache.druid.server.coordination.ServerType;
import org.apache.druid.server.metrics.NoopServiceEmitter;
import org.apache.druid.server.security.Escalator;
import org.apache.druid.sql.calcite.planner.PlannerConfig;
import org.apache.druid.sql.calcite.planner.SegmentMetadataCacheConfig;
@ -89,7 +90,8 @@ public class DruidSchemaInternRowSignatureBenchmark
joinableFactory,
SegmentMetadataCacheConfig.create(),
escalator,
brokerInternalQueryConfig
brokerInternalQueryConfig,
new NoopServiceEmitter()
);
}

View File

@ -64,6 +64,8 @@ Metrics may have additional dimensions beyond those listed above.
|`sqlQuery/time`|Milliseconds taken to complete a SQL query.|`id`, `nativeQueryIds`, `dataSource`, `remoteAddress`, `success`|< 1s|
|`sqlQuery/planningTimeMs`|Milliseconds taken to plan a SQL to native query.|`id`, `nativeQueryIds`, `dataSource`, `remoteAddress`, `success`| |
|`sqlQuery/bytes`|Number of bytes returned in the SQL query response.|`id`, `nativeQueryIds`, `dataSource`, `remoteAddress`, `success`| |
|`init/serverview/time`|Time taken to initialize the broker server view. Useful to detect if brokers are taking too long to start.||Depends on the number of segments.|
|`init/metadatacache/time`|Time taken to initialize the broker segment metadata cache. Useful to detect if brokers are taking too long to start||Depends on the number of segments.|
### Historical
@ -315,6 +317,7 @@ These metrics are for the Druid Coordinator and are reset each time the Coordina
|`metadata/kill/compaction/count`|Total number of compaction configurations that were automatically deleted from metadata store per each Coordinator kill compaction configuration duty run. Note that this metric is only emitted when `druid.coordinator.kill.compaction.on` is set to true.| |Varies|
|`metadata/kill/rule/count`|Total number of rules that were automatically deleted from metadata store per each Coordinator kill rule duty run. This metric can help adjust `druid.coordinator.kill.rule.durationToRetain` configuration based on whether more or less rules need to be deleted per cycle. Note that this metric is only emitted when `druid.coordinator.kill.rule.on` is set to true.| |Varies|
|`metadata/kill/datasource/count`|Total number of datasource metadata that were automatically deleted from metadata store per each Coordinator kill datasource duty run (Note: datasource metadata only exists for datasource created from supervisor). This metric can help adjust `druid.coordinator.kill.datasource.durationToRetain` configuration based on whether more or less datasource metadata need to be deleted per cycle. Note that this metric is only emitted when `druid.coordinator.kill.datasource.on` is set to true.| |Varies|
|`init/serverview/time`|Time taken to initialize the coordinator server view.||Depends on the number of segments|
If `emitBalancingStats` is set to `true` in the Coordinator [dynamic configuration](../configuration/index.md#dynamic-configuration), then [log entries](../configuration/logging.md) for class `org.apache.druid.server.coordinator.duty.EmitClusterStatsAndMetrics` will have extra information on balancing decisions.

View File

@ -35,6 +35,7 @@ import org.apache.druid.java.util.common.concurrent.Execs;
import org.apache.druid.java.util.common.lifecycle.LifecycleStart;
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.java.util.emitter.service.ServiceEmitter;
import org.apache.druid.java.util.emitter.service.ServiceMetricEvent;
import org.apache.druid.java.util.http.client.HttpClient;
import org.apache.druid.query.QueryRunner;
import org.apache.druid.query.QueryToolChestWarehouse;
@ -181,10 +182,15 @@ public class BrokerServerView implements TimelineServerView
public void start() throws InterruptedException
{
if (segmentWatcherConfig.isAwaitInitializationOnStart()) {
final long startNanos = System.nanoTime();
log.debug("%s waiting for initialization.", getClass().getSimpleName());
final long startMillis = System.currentTimeMillis();
log.info("%s waiting for initialization.", getClass().getSimpleName());
awaitInitialization();
log.info("%s initialized in [%,d] ms.", getClass().getSimpleName(), (System.nanoTime() - startNanos) / 1000000);
final long endMillis = System.currentTimeMillis();
log.info("%s initialized in [%,d] ms.", getClass().getSimpleName(), endMillis - startMillis);
emitter.emit(ServiceMetricEvent.builder().build(
"init/serverview/time",
endMillis - startMillis
));
}
}

View File

@ -26,6 +26,8 @@ import org.apache.druid.guice.ManageLifecycle;
import org.apache.druid.java.util.common.concurrent.Execs;
import org.apache.druid.java.util.common.lifecycle.LifecycleStart;
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.java.util.emitter.service.ServiceEmitter;
import org.apache.druid.java.util.emitter.service.ServiceMetricEvent;
import org.apache.druid.query.DataSource;
import org.apache.druid.server.coordination.DruidServerMetadata;
import org.apache.druid.timeline.DataSegment;
@ -56,15 +58,18 @@ public class CoordinatorServerView implements InventoryView
private final CoordinatorSegmentWatcherConfig segmentWatcherConfig;
private final CountDownLatch initialized = new CountDownLatch(1);
private final ServiceEmitter emitter;
@Inject
public CoordinatorServerView(
ServerInventoryView baseView,
CoordinatorSegmentWatcherConfig segmentWatcherConfig
CoordinatorSegmentWatcherConfig segmentWatcherConfig,
ServiceEmitter emitter
)
{
this.baseView = baseView;
this.segmentWatcherConfig = segmentWatcherConfig;
this.emitter = emitter;
this.segmentLoadInfos = new HashMap<>();
this.timelines = new HashMap<>();
@ -117,7 +122,12 @@ public class CoordinatorServerView implements InventoryView
final long startMillis = System.currentTimeMillis();
log.info("%s waiting for initialization.", getClass().getSimpleName());
initialized.await();
log.info("%s initialized in [%,d] ms.", getClass().getSimpleName(), System.currentTimeMillis() - startMillis);
final long endMillis = System.currentTimeMillis();
log.info("%s initialized in [%,d] ms.", getClass().getSimpleName(), endMillis - startMillis);
emitter.emit(ServiceMetricEvent.builder().build(
"init/serverview/time",
endMillis - startMillis
));
}
}

View File

@ -174,7 +174,9 @@ public class CoordinatorClient
if (!response.getStatus().equals(HttpResponseStatus.OK)) {
throw new ISE(
"Error while fetching database segment data source segment status[%s] content[%s]",
"Error while fetching database segment[%s] in dataSource[%s] with status[%s] content[%s]",
segmentId,
dataSource,
response.getStatus(),
response.getContent()
);

View File

@ -668,6 +668,7 @@ public class BrokerServerViewTest extends CuratorTestBase
);
baseView.start();
brokerServerView.start();
}
private DataSegment dataSegmentWithIntervalAndVersion(String intervalStr, String version)

View File

@ -35,6 +35,7 @@ import org.apache.druid.segment.TestHelper;
import org.apache.druid.server.coordination.DruidServerMetadata;
import org.apache.druid.server.coordination.ServerType;
import org.apache.druid.server.initialization.ZkPathsConfig;
import org.apache.druid.server.metrics.NoopServiceEmitter;
import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.TimelineLookup;
import org.apache.druid.timeline.TimelineObjectHolder;
@ -333,10 +334,12 @@ public class CoordinatorServerViewTest extends CuratorTestBase
overlordServerView = new CoordinatorServerView(
baseView,
new CoordinatorSegmentWatcherConfig()
new CoordinatorSegmentWatcherConfig(),
new NoopServiceEmitter()
);
baseView.start();
overlordServerView.start();
}
private DataSegment dataSegmentWithIntervalAndVersion(String intervalStr, String version)

View File

@ -449,7 +449,7 @@ public class CuratorDruidCoordinatorTest extends CuratorTestBase
}
};
serverView = new CoordinatorServerView(baseView, new CoordinatorSegmentWatcherConfig());
serverView = new CoordinatorServerView(baseView, new CoordinatorSegmentWatcherConfig(), new NoopServiceEmitter());
baseView.start();

View File

@ -44,6 +44,8 @@ import org.apache.druid.java.util.common.guava.Yielders;
import org.apache.druid.java.util.common.lifecycle.LifecycleStart;
import org.apache.druid.java.util.common.lifecycle.LifecycleStop;
import org.apache.druid.java.util.emitter.EmittingLogger;
import org.apache.druid.java.util.emitter.service.ServiceEmitter;
import org.apache.druid.java.util.emitter.service.ServiceMetricEvent;
import org.apache.druid.query.GlobalTableDataSource;
import org.apache.druid.query.TableDataSource;
import org.apache.druid.query.metadata.metadata.AllColumnIncluderator;
@ -114,6 +116,7 @@ public class SegmentMetadataCache
private final JoinableFactory joinableFactory;
private final ExecutorService cacheExec;
private final ExecutorService callbackExec;
private final ServiceEmitter emitter;
/**
* Map of DataSource -> DruidTable.
@ -217,7 +220,8 @@ public class SegmentMetadataCache
final JoinableFactory joinableFactory,
final SegmentMetadataCacheConfig config,
final Escalator escalator,
final BrokerInternalQueryConfig brokerInternalQueryConfig
final BrokerInternalQueryConfig brokerInternalQueryConfig,
final ServiceEmitter emitter
)
{
this.queryLifecycleFactory = Preconditions.checkNotNull(queryLifecycleFactory, "queryLifecycleFactory");
@ -229,6 +233,7 @@ public class SegmentMetadataCache
this.callbackExec = Execs.singleThreaded("DruidSchema-Callback-%d");
this.escalator = escalator;
this.brokerInternalQueryConfig = brokerInternalQueryConfig;
this.emitter = emitter;
initServerViewTimelineCallback(serverView);
}
@ -378,10 +383,15 @@ public class SegmentMetadataCache
startCacheExec();
if (config.isAwaitInitializationOnStart()) {
final long startNanos = System.nanoTime();
log.debug("%s waiting for initialization.", getClass().getSimpleName());
final long startMillis = System.currentTimeMillis();
log.info("%s waiting for initialization.", getClass().getSimpleName());
awaitInitialization();
log.info("%s initialized in [%,d] ms.", getClass().getSimpleName(), (System.nanoTime() - startNanos) / 1000000);
final long endMillis = System.currentTimeMillis();
log.info("%s initialized in [%,d] ms.", getClass().getSimpleName(), endMillis - startMillis);
emitter.emit(ServiceMetricEvent.builder().build(
"init/metadatacache/time",
endMillis - startMillis
));
}
}

View File

@ -37,6 +37,7 @@ import org.apache.druid.discovery.DruidNodeDiscoveryProvider;
import org.apache.druid.guice.LazySingleton;
import org.apache.druid.guice.LifecycleModule;
import org.apache.druid.guice.annotations.Json;
import org.apache.druid.java.util.emitter.service.ServiceEmitter;
import org.apache.druid.query.lookup.LookupExtractorFactoryContainerProvider;
import org.apache.druid.query.lookup.LookupReferencesManager;
import org.apache.druid.segment.join.JoinableFactory;
@ -121,6 +122,7 @@ public class DruidCalciteSchemaModuleTest extends CalciteTestBase
binder.bind(ObjectMapper.class).annotatedWith(Json.class).toInstance(objectMapper);
binder.bindScope(LazySingleton.class, Scopes.SINGLETON);
binder.bind(LookupExtractorFactoryContainerProvider.class).toInstance(lookupReferencesManager);
binder.bind(ServiceEmitter.class).toInstance(new ServiceEmitter("", "", null));
},
new LifecycleModule(),
target);

View File

@ -28,6 +28,7 @@ import org.apache.druid.segment.join.MapJoinableFactory;
import org.apache.druid.segment.loading.SegmentLoader;
import org.apache.druid.server.QueryStackTests;
import org.apache.druid.server.SegmentManager;
import org.apache.druid.server.metrics.NoopServiceEmitter;
import org.apache.druid.server.security.NoopEscalator;
import org.apache.druid.sql.calcite.planner.SegmentMetadataCacheConfig;
import org.apache.druid.sql.calcite.util.CalciteTestBase;
@ -59,7 +60,8 @@ public class DruidSchemaNoDataInitTest extends CalciteTestBase
new MapJoinableFactory(ImmutableSet.of(), ImmutableMap.of()),
SEGMENT_CACHE_CONFIG_DEFAULT,
new NoopEscalator(),
new BrokerInternalQueryConfig()
new BrokerInternalQueryConfig(),
new NoopServiceEmitter()
);
cache.start();

View File

@ -140,7 +140,8 @@ public class SegmentDataCacheConcurrencyTest extends SegmentMetadataCacheCommon
new MapJoinableFactory(ImmutableSet.of(), ImmutableMap.of()),
SEGMENT_CACHE_CONFIG_DEFAULT,
new NoopEscalator(),
new BrokerInternalQueryConfig()
new BrokerInternalQueryConfig(),
new NoopServiceEmitter()
)
{
@Override
@ -249,7 +250,8 @@ public class SegmentDataCacheConcurrencyTest extends SegmentMetadataCacheCommon
new MapJoinableFactory(ImmutableSet.of(), ImmutableMap.of()),
SEGMENT_CACHE_CONFIG_DEFAULT,
new NoopEscalator(),
new BrokerInternalQueryConfig()
new BrokerInternalQueryConfig(),
new NoopServiceEmitter()
)
{
@Override

View File

@ -59,6 +59,7 @@ import org.apache.druid.server.QueryLifecycleFactory;
import org.apache.druid.server.QueryResponse;
import org.apache.druid.server.coordination.DruidServerMetadata;
import org.apache.druid.server.coordination.ServerType;
import org.apache.druid.server.metrics.NoopServiceEmitter;
import org.apache.druid.server.security.Access;
import org.apache.druid.server.security.AllowAllAuthenticator;
import org.apache.druid.server.security.NoopEscalator;
@ -194,7 +195,8 @@ public class SegmentMetadataCacheTest extends SegmentMetadataCacheCommon
),
SEGMENT_CACHE_CONFIG_DEFAULT,
new NoopEscalator(),
new BrokerInternalQueryConfig()
new BrokerInternalQueryConfig(),
new NoopServiceEmitter()
)
{
@Override
@ -231,7 +233,8 @@ public class SegmentMetadataCacheTest extends SegmentMetadataCacheCommon
),
SEGMENT_CACHE_CONFIG_DEFAULT,
new NoopEscalator(),
new BrokerInternalQueryConfig()
new BrokerInternalQueryConfig(),
new NoopServiceEmitter()
)
{
boolean throwException = true;
@ -281,7 +284,8 @@ public class SegmentMetadataCacheTest extends SegmentMetadataCacheCommon
),
SEGMENT_CACHE_CONFIG_DEFAULT,
new NoopEscalator(),
new BrokerInternalQueryConfig()
new BrokerInternalQueryConfig(),
new NoopServiceEmitter()
)
{
@Override
@ -554,7 +558,8 @@ public class SegmentMetadataCacheTest extends SegmentMetadataCacheCommon
new MapJoinableFactory(ImmutableSet.of(), ImmutableMap.of()),
SEGMENT_CACHE_CONFIG_DEFAULT,
new NoopEscalator(),
new BrokerInternalQueryConfig()
new BrokerInternalQueryConfig(),
new NoopServiceEmitter()
)
{
@Override
@ -596,7 +601,8 @@ public class SegmentMetadataCacheTest extends SegmentMetadataCacheCommon
new MapJoinableFactory(ImmutableSet.of(), ImmutableMap.of()),
SEGMENT_CACHE_CONFIG_DEFAULT,
new NoopEscalator(),
new BrokerInternalQueryConfig()
new BrokerInternalQueryConfig(),
new NoopServiceEmitter()
)
{
@Override
@ -642,7 +648,8 @@ public class SegmentMetadataCacheTest extends SegmentMetadataCacheCommon
new MapJoinableFactory(ImmutableSet.of(), ImmutableMap.of()),
SEGMENT_CACHE_CONFIG_DEFAULT,
new NoopEscalator(),
new BrokerInternalQueryConfig()
new BrokerInternalQueryConfig(),
new NoopServiceEmitter()
)
{
@Override
@ -685,7 +692,8 @@ public class SegmentMetadataCacheTest extends SegmentMetadataCacheCommon
new MapJoinableFactory(ImmutableSet.of(), ImmutableMap.of()),
SEGMENT_CACHE_CONFIG_DEFAULT,
new NoopEscalator(),
new BrokerInternalQueryConfig()
new BrokerInternalQueryConfig(),
new NoopServiceEmitter()
)
{
@Override
@ -725,7 +733,8 @@ public class SegmentMetadataCacheTest extends SegmentMetadataCacheCommon
new MapJoinableFactory(ImmutableSet.of(), ImmutableMap.of()),
SEGMENT_CACHE_CONFIG_DEFAULT,
new NoopEscalator(),
new BrokerInternalQueryConfig()
new BrokerInternalQueryConfig(),
new NoopServiceEmitter()
)
{
@Override
@ -782,7 +791,8 @@ public class SegmentMetadataCacheTest extends SegmentMetadataCacheCommon
new MapJoinableFactory(ImmutableSet.of(), ImmutableMap.of()),
SEGMENT_CACHE_CONFIG_DEFAULT,
new NoopEscalator(),
new BrokerInternalQueryConfig()
new BrokerInternalQueryConfig(),
new NoopServiceEmitter()
)
{
@Override
@ -842,7 +852,8 @@ public class SegmentMetadataCacheTest extends SegmentMetadataCacheCommon
new MapJoinableFactory(ImmutableSet.of(), ImmutableMap.of()),
SEGMENT_CACHE_CONFIG_DEFAULT,
new NoopEscalator(),
new BrokerInternalQueryConfig()
new BrokerInternalQueryConfig(),
new NoopServiceEmitter()
)
{
@Override
@ -876,7 +887,8 @@ public class SegmentMetadataCacheTest extends SegmentMetadataCacheCommon
new MapJoinableFactory(ImmutableSet.of(), ImmutableMap.of()),
SEGMENT_CACHE_CONFIG_DEFAULT,
new NoopEscalator(),
new BrokerInternalQueryConfig()
new BrokerInternalQueryConfig(),
new NoopServiceEmitter()
)
{
@Override
@ -923,7 +935,8 @@ public class SegmentMetadataCacheTest extends SegmentMetadataCacheCommon
new MapJoinableFactory(ImmutableSet.of(), ImmutableMap.of()),
SEGMENT_CACHE_CONFIG_DEFAULT,
new NoopEscalator(),
new BrokerInternalQueryConfig()
new BrokerInternalQueryConfig(),
new NoopServiceEmitter()
)
{
@Override
@ -1160,7 +1173,8 @@ public class SegmentMetadataCacheTest extends SegmentMetadataCacheCommon
),
SEGMENT_CACHE_CONFIG_DEFAULT,
new NoopEscalator(),
brokerInternalQueryConfig
brokerInternalQueryConfig,
new NoopServiceEmitter()
);
EasyMock.expect(factoryMock.factorize()).andReturn(lifecycleMock).once();

View File

@ -79,6 +79,7 @@ import org.apache.druid.server.SegmentManager;
import org.apache.druid.server.coordination.DruidServerMetadata;
import org.apache.druid.server.coordination.ServerType;
import org.apache.druid.server.coordinator.BytesAccumulatingResponseHandler;
import org.apache.druid.server.metrics.NoopServiceEmitter;
import org.apache.druid.server.security.Access;
import org.apache.druid.server.security.Action;
import org.apache.druid.server.security.AuthenticationResult;
@ -253,7 +254,8 @@ public class SystemSchemaTest extends CalciteTestBase
new MapJoinableFactory(ImmutableSet.of(), ImmutableMap.of()),
SEGMENT_CACHE_CONFIG_DEFAULT,
new NoopEscalator(),
new BrokerInternalQueryConfig()
new BrokerInternalQueryConfig(),
new NoopServiceEmitter()
);
cache.start();
cache.awaitInitialization();

View File

@ -46,6 +46,7 @@ import org.apache.druid.server.QueryLifecycleFactory;
import org.apache.druid.server.QueryStackTests;
import org.apache.druid.server.SegmentManager;
import org.apache.druid.server.log.NoopRequestLogger;
import org.apache.druid.server.metrics.NoopServiceEmitter;
import org.apache.druid.server.security.AuthConfig;
import org.apache.druid.server.security.AuthorizerMapper;
import org.apache.druid.sql.SqlLifecycleManager;
@ -209,7 +210,8 @@ public class QueryFrameworkUtils
createDefaultJoinableFactory(injector),
SegmentMetadataCacheConfig.create(),
CalciteTests.TEST_AUTHENTICATOR_ESCALATOR,
new BrokerInternalQueryConfig()
new BrokerInternalQueryConfig(),
new NoopServiceEmitter()
);
try {