From dd34691004ca87128e775136ac99be404191f897 Mon Sep 17 00:00:00 2001 From: Mingming Qiu Date: Thu, 21 Feb 2019 03:56:23 +0800 Subject: [PATCH] Coordinator await initialization before finishing startup (#6847) * Curator server inventory await initialization * address comments * print exception object in log * remove throws ISE * cachingCost awaitInitialization default to false --- docs/content/configuration/index.md | 6 ++- .../CoordinatorSegmentWatcherConfig.java | 33 ++++++++++++++++ .../druid/client/CoordinatorServerView.java | 23 ++++++++++- .../druid/client/HttpServerInventoryView.java | 13 +------ .../CachingCostBalancerStrategyConfig.java | 33 ++++++++++++++++ .../CachingCostBalancerStrategyFactory.java | 38 ++++++++++++++----- .../client/CoordinatorServerViewTest.java | 3 +- .../CuratorDruidCoordinatorTest.java | 3 +- .../org/apache/druid/cli/CliCoordinator.java | 11 +++++- 9 files changed, 135 insertions(+), 28 deletions(-) create mode 100644 server/src/main/java/org/apache/druid/client/CoordinatorSegmentWatcherConfig.java create mode 100644 server/src/main/java/org/apache/druid/server/coordinator/CachingCostBalancerStrategyConfig.java diff --git a/docs/content/configuration/index.md b/docs/content/configuration/index.md index 639bb45cce2..22dd1aea59c 100644 --- a/docs/content/configuration/index.md +++ b/docs/content/configuration/index.md @@ -726,7 +726,8 @@ These Coordinator static configurations can be defined in the `coordinator/runti |`druid.coordinator.kill.period`|How often to send kill tasks to the indexing service. Value must be greater than `druid.coordinator.period.indexingPeriod`. Only applies if kill is turned on.|P1D (1 Day)| |`druid.coordinator.kill.durationToRetain`| Do not kill segments in last `durationToRetain`, must be greater or equal to 0. Only applies and MUST be specified if kill is turned on. Note that default value is invalid.|PT-1S (-1 seconds)| |`druid.coordinator.kill.maxSegments`|Kill at most n segments per kill task submission, must be greater than 0. Only applies and MUST be specified if kill is turned on. Note that default value is invalid.|0| -|`druid.coordinator.balancer.strategy`|Specify the type of balancing strategy that the Coordinator should use to distribute segments among the Historicals. `cachingCost` is logically equivalent to `cost` but is more CPU-efficient on large clusters and will replace `cost` in the future versions, users are invited to try it. Use `diskNormalized` to distribute segments among nodes so that the disks fill up uniformly and use `random` to randomly pick nodes to distribute segments.|`cost`| +|`druid.coordinator.balancer.strategy`|Specify the type of balancing strategy that the coordinator should use to distribute segments among the historicals. `cachingCost` is logically equivalent to `cost` but is more CPU-efficient on large clusters and will replace `cost` in the future versions, users are invited to try it. Use `diskNormalized` to distribute segments among nodes so that the disks fill up uniformly and use `random` to randomly pick nodes to distribute segments.|`cost`| +|`druid.coordinator.balancer.cachingCost.awaitInitialization`|Whether to wait for segment view initialization before creating the `cachingCost` balancing strategy. This property is enabled only when `druid.coordinator.balancer.strategy` is `cachingCost`. If set to 'true', the Coordinator will not start to assign segments, until the segment view is initialized. If set to 'false', the Coordinator will fallback to use the `cost` balancing strategy only if the segment view is not initialized yet. Notes, it may take much time to wait for the initialization since the `cachingCost` balancing strategy involves much computing to build itself.|false| |`druid.coordinator.loadqueuepeon.repeatDelay`|The start and repeat delay for the loadqueuepeon , which manages the load and drop of segments.|PT0.050S (50 ms)| |`druid.coordinator.asOverlord.enabled`|Boolean value for whether this Coordinator node should act like an Overlord as well. This configuration allows users to simplify a druid cluster by not having to deploy any standalone Overlord nodes. If set to true, then Overlord console is available at `http://coordinator-host:port/console.html` and be sure to set `druid.coordinator.asOverlord.overlordService` also. See next.|false| |`druid.coordinator.asOverlord.overlordService`| Required, if `druid.coordinator.asOverlord.enabled` is `true`. This must be same value as `druid.service` on standalone Overlord nodes and `druid.selectors.indexing.serviceName` on Middle Managers.|NULL| @@ -735,7 +736,8 @@ These Coordinator static configurations can be defined in the `coordinator/runti |Property|Possible Values|Description|Default| |--------|---------------|-----------|-------| |`druid.serverview.type`|batch or http|Segment discovery method to use. "http" enables discovering segments using HTTP instead of zookeeper.|batch| -|`druid.coordinator.loadqueuepeon.type`|curator or http|Whether to use "http" or "curator" implementation to assign segment loads/drops to Historical|curator| +|`druid.coordinator.loadqueuepeon.type`|curator or http|Whether to use "http" or "curator" implementation to assign segment loads/drops to historical|curator| +|`druid.coordinator.segment.awaitInitializationOnStart`|true or false|Whether the the Coordinator will wait for its view of segments to fully initialize before starting up. If set to 'true', the Coordinator's HTTP server will not start up, and the Coordinator will not announce itself as available, until the server view is initialized.|true| ###### Additional config when "http" loadqueuepeon is used |Property|Description|Default| diff --git a/server/src/main/java/org/apache/druid/client/CoordinatorSegmentWatcherConfig.java b/server/src/main/java/org/apache/druid/client/CoordinatorSegmentWatcherConfig.java new file mode 100644 index 00000000000..ed51012fe18 --- /dev/null +++ b/server/src/main/java/org/apache/druid/client/CoordinatorSegmentWatcherConfig.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.client; + +import com.fasterxml.jackson.annotation.JsonProperty; + +public class CoordinatorSegmentWatcherConfig +{ + @JsonProperty + private boolean awaitInitializationOnStart = true; + + public boolean isAwaitInitializationOnStart() + { + return awaitInitializationOnStart; + } +} diff --git a/server/src/main/java/org/apache/druid/client/CoordinatorServerView.java b/server/src/main/java/org/apache/druid/client/CoordinatorServerView.java index 3263bebcad0..91ebe381dc4 100644 --- a/server/src/main/java/org/apache/druid/client/CoordinatorServerView.java +++ b/server/src/main/java/org/apache/druid/client/CoordinatorServerView.java @@ -22,7 +22,9 @@ package org.apache.druid.client; import com.google.common.collect.Iterables; import com.google.common.collect.Ordering; import com.google.inject.Inject; +import org.apache.druid.guice.ManageLifecycle; import org.apache.druid.java.util.common.concurrent.Execs; +import org.apache.druid.java.util.common.lifecycle.LifecycleStart; import org.apache.druid.java.util.common.logger.Logger; import org.apache.druid.query.DataSource; import org.apache.druid.server.coordination.DruidServerMetadata; @@ -34,11 +36,13 @@ import org.apache.druid.timeline.partition.PartitionChunk; import java.util.Collection; import java.util.HashMap; import java.util.Map; +import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutorService; /** * ServerView of coordinator for the state of segments being loaded in the cluster. */ +@ManageLifecycle public class CoordinatorServerView implements InventoryView { private static final Logger log = new Logger(CoordinatorServerView.class); @@ -49,13 +53,18 @@ public class CoordinatorServerView implements InventoryView private final Map> timelines; private final ServerInventoryView baseView; + private final CoordinatorSegmentWatcherConfig segmentWatcherConfig; + + private final CountDownLatch initialized = new CountDownLatch(1); @Inject public CoordinatorServerView( - ServerInventoryView baseView + ServerInventoryView baseView, + CoordinatorSegmentWatcherConfig segmentWatcherConfig ) { this.baseView = baseView; + this.segmentWatcherConfig = segmentWatcherConfig; this.segmentLoadInfos = new HashMap<>(); this.timelines = new HashMap<>(); @@ -81,6 +90,7 @@ public class CoordinatorServerView implements InventoryView @Override public ServerView.CallbackAction segmentViewInitialized() { + initialized.countDown(); return ServerView.CallbackAction.CONTINUE; } } @@ -100,6 +110,17 @@ public class CoordinatorServerView implements InventoryView ); } + @LifecycleStart + public void start() throws InterruptedException + { + if (segmentWatcherConfig.isAwaitInitializationOnStart()) { + final long startMillis = System.currentTimeMillis(); + log.info("%s waiting for initialization.", getClass().getSimpleName()); + initialized.await(); + log.info("%s initialized in [%,d] ms.", getClass().getSimpleName(), System.currentTimeMillis() - startMillis); + } + } + private void removeServer(DruidServer server) { for (DataSegment segment : server.getSegments()) { diff --git a/server/src/main/java/org/apache/druid/client/HttpServerInventoryView.java b/server/src/main/java/org/apache/druid/client/HttpServerInventoryView.java index be0811769f2..1279b3af41c 100644 --- a/server/src/main/java/org/apache/druid/client/HttpServerInventoryView.java +++ b/server/src/main/java/org/apache/druid/client/HttpServerInventoryView.java @@ -64,7 +64,6 @@ import java.util.List; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; -import java.util.concurrent.CountDownLatch; import java.util.concurrent.Executor; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; @@ -111,8 +110,6 @@ public class HttpServerInventoryView implements ServerInventoryView, FilteredSer private final ObjectMapper smileMapper; private final HttpServerInventoryViewConfig config; - private final CountDownLatch inventoryInitializationLatch = new CountDownLatch(1); - @Inject public HttpServerInventoryView( final @Smile ObjectMapper smileMapper, @@ -132,7 +129,7 @@ public class HttpServerInventoryView implements ServerInventoryView, FilteredSer @LifecycleStart - public void start() throws Exception + public void start() { synchronized (lifecycleLock) { if (!lifecycleLock.canStart()) { @@ -197,12 +194,6 @@ public class HttpServerInventoryView implements ServerInventoryView, FilteredSer lifecycleLock.exitStart(); } - log.info("Waiting for Server Inventory Initialization..."); - - while (!inventoryInitializationLatch.await(1, TimeUnit.MINUTES)) { - log.info("Still waiting for Server Inventory Initialization..."); - } - log.info("Started HttpServerInventoryView."); } } @@ -371,8 +362,6 @@ public class HttpServerInventoryView implements ServerInventoryView, FilteredSer } } - inventoryInitializationLatch.countDown(); - log.info("Calling SegmentCallback.segmentViewInitialized() for all callbacks."); runSegmentCallbacks( diff --git a/server/src/main/java/org/apache/druid/server/coordinator/CachingCostBalancerStrategyConfig.java b/server/src/main/java/org/apache/druid/server/coordinator/CachingCostBalancerStrategyConfig.java new file mode 100644 index 00000000000..eb0a668301d --- /dev/null +++ b/server/src/main/java/org/apache/druid/server/coordinator/CachingCostBalancerStrategyConfig.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.server.coordinator; + +import com.fasterxml.jackson.annotation.JsonProperty; + +public class CachingCostBalancerStrategyConfig +{ + @JsonProperty + private boolean awaitInitialization = false; + + public boolean isAwaitInitialization() + { + return awaitInitialization; + } +} diff --git a/server/src/main/java/org/apache/druid/server/coordinator/CachingCostBalancerStrategyFactory.java b/server/src/main/java/org/apache/druid/server/coordinator/CachingCostBalancerStrategyFactory.java index 410a24d99e8..93df9ab738c 100644 --- a/server/src/main/java/org/apache/druid/server/coordinator/CachingCostBalancerStrategyFactory.java +++ b/server/src/main/java/org/apache/druid/server/coordinator/CachingCostBalancerStrategyFactory.java @@ -35,10 +35,10 @@ import org.apache.druid.timeline.DataSegment; import java.util.concurrent.CancellationException; import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.RejectedExecutionException; -import java.util.concurrent.atomic.AtomicBoolean; public class CachingCostBalancerStrategyFactory implements BalancerStrategyFactory { @@ -47,19 +47,19 @@ public class CachingCostBalancerStrategyFactory implements BalancerStrategyFacto /** Must be single-threaded, because {@link ClusterCostCache.Builder} and downstream builders are not thread-safe */ private final ExecutorService executor = Execs.singleThreaded("CachingCostBalancerStrategy-executor"); private final ClusterCostCache.Builder clusterCostCacheBuilder = ClusterCostCache.builder(); - /** - * Atomic is needed to use compareAndSet(true, true) construction below, that is linearizable with the write made from - * callback, that ensures visibility of the write made from callback. Neither plain field nor volatile field read - * ensure such visibility - */ - private final AtomicBoolean initialized = new AtomicBoolean(false); + + private final CountDownLatch initialized = new CountDownLatch(1); + private final CachingCostBalancerStrategyConfig config; @JsonCreator public CachingCostBalancerStrategyFactory( @JacksonInject ServerInventoryView serverInventoryView, - @JacksonInject Lifecycle lifecycle + @JacksonInject Lifecycle lifecycle, + @JacksonInject CachingCostBalancerStrategyConfig config ) throws Exception { + this.config = config; + // Adding to lifecycle dynamically because couldn't use @ManageLifecycle on the class, // see https://github.com/apache/incubator-druid/issues/4980 lifecycle.addMaybeStartManagedInstance(this); @@ -89,7 +89,7 @@ public class CachingCostBalancerStrategyFactory implements BalancerStrategyFacto @Override public ServerView.CallbackAction segmentViewInitialized() { - initialized.set(true); + initialized.countDown(); return ServerView.CallbackAction.CONTINUE; } } @@ -118,10 +118,28 @@ public class CachingCostBalancerStrategyFactory implements BalancerStrategyFacto executor.shutdownNow(); } + private boolean isInitialized() + { + return initialized.getCount() == 0; + } + @Override public BalancerStrategy createBalancerStrategy(final ListeningExecutorService exec) { - if (initialized.compareAndSet(true, true)) { + if (!isInitialized() && config.isAwaitInitialization()) { + try { + final long startMillis = System.currentTimeMillis(); + LOG.info("Waiting for segment view initialization before creating CachingCostBalancerStrategy."); + initialized.await(); + LOG.info("Segment view initialized in [%,d] ms.", System.currentTimeMillis() - startMillis); + } + catch (InterruptedException e) { + LOG.error(e, "Segment view initialization has been interrupted."); + Thread.currentThread().interrupt(); + } + } + + if (isInitialized()) { try { // Calling clusterCostCacheBuilder.build() in the same thread (executor's sole thread) where // clusterCostCacheBuilder is updated, to avoid problems with concurrent updates diff --git a/server/src/test/java/org/apache/druid/client/CoordinatorServerViewTest.java b/server/src/test/java/org/apache/druid/client/CoordinatorServerViewTest.java index 6550d9720af..f6e26989ead 100644 --- a/server/src/test/java/org/apache/druid/client/CoordinatorServerViewTest.java +++ b/server/src/test/java/org/apache/druid/client/CoordinatorServerViewTest.java @@ -328,7 +328,8 @@ public class CoordinatorServerViewTest extends CuratorTestBase }; overlordServerView = new CoordinatorServerView( - baseView + baseView, + new CoordinatorSegmentWatcherConfig() ); baseView.start(); diff --git a/server/src/test/java/org/apache/druid/server/coordinator/CuratorDruidCoordinatorTest.java b/server/src/test/java/org/apache/druid/server/coordinator/CuratorDruidCoordinatorTest.java index 68bce7adee2..94fecdb4d78 100644 --- a/server/src/test/java/org/apache/druid/server/coordinator/CuratorDruidCoordinatorTest.java +++ b/server/src/test/java/org/apache/druid/server/coordinator/CuratorDruidCoordinatorTest.java @@ -29,6 +29,7 @@ import org.apache.curator.framework.recipes.cache.PathChildrenCache; import org.apache.curator.framework.recipes.cache.PathChildrenCacheEvent; import org.apache.curator.utils.ZKPaths; import org.apache.druid.client.BatchServerInventoryView; +import org.apache.druid.client.CoordinatorSegmentWatcherConfig; import org.apache.druid.client.CoordinatorServerView; import org.apache.druid.client.DruidServer; import org.apache.druid.client.ImmutableDruidDataSource; @@ -472,7 +473,7 @@ public class CuratorDruidCoordinatorTest extends CuratorTestBase } }; - serverView = new CoordinatorServerView(baseView); + serverView = new CoordinatorServerView(baseView, new CoordinatorSegmentWatcherConfig()); baseView.start(); diff --git a/services/src/main/java/org/apache/druid/cli/CliCoordinator.java b/services/src/main/java/org/apache/druid/cli/CliCoordinator.java index 1e4daae01a2..3ba06cf34c2 100644 --- a/services/src/main/java/org/apache/druid/cli/CliCoordinator.java +++ b/services/src/main/java/org/apache/druid/cli/CliCoordinator.java @@ -29,6 +29,7 @@ import com.google.inject.name.Names; import io.airlift.airline.Command; import org.apache.curator.framework.CuratorFramework; import org.apache.druid.audit.AuditManager; +import org.apache.druid.client.CoordinatorSegmentWatcherConfig; import org.apache.druid.client.CoordinatorServerView; import org.apache.druid.client.HttpServerInventoryViewResource; import org.apache.druid.client.coordinator.Coordinator; @@ -58,6 +59,7 @@ import org.apache.druid.metadata.MetadataStorage; import org.apache.druid.metadata.MetadataStorageProvider; import org.apache.druid.server.audit.AuditManagerProvider; import org.apache.druid.server.coordinator.BalancerStrategyFactory; +import org.apache.druid.server.coordinator.CachingCostBalancerStrategyConfig; import org.apache.druid.server.coordinator.DruidCoordinator; import org.apache.druid.server.coordinator.DruidCoordinatorCleanupPendingSegments; import org.apache.druid.server.coordinator.DruidCoordinatorConfig; @@ -148,6 +150,12 @@ public class CliCoordinator extends ServerRunnable JsonConfigProvider.bind(binder, "druid.manager.rules", MetadataRuleManagerConfig.class); JsonConfigProvider.bind(binder, "druid.manager.lookups", LookupCoordinatorManagerConfig.class); JsonConfigProvider.bind(binder, "druid.coordinator.balancer", BalancerStrategyFactory.class); + JsonConfigProvider.bind(binder, "druid.coordinator.segment", CoordinatorSegmentWatcherConfig.class); + JsonConfigProvider.bind( + binder, + "druid.coordinator.balancer.cachingCost", + CachingCostBalancerStrategyConfig.class + ); binder.bind(RedirectFilter.class).in(LazySingleton.class); if (beOverlord) { @@ -169,11 +177,12 @@ public class CliCoordinator extends ServerRunnable .in(ManageLifecycle.class); binder.bind(IndexingServiceClient.class).to(HttpIndexingServiceClient.class).in(LazySingleton.class); - binder.bind(CoordinatorServerView.class).in(LazySingleton.class); binder.bind(LookupCoordinatorManager.class).in(LazySingleton.class); + binder.bind(CoordinatorServerView.class); binder.bind(DruidCoordinator.class); + LifecycleModule.register(binder, CoordinatorServerView.class); LifecycleModule.register(binder, MetadataStorage.class); LifecycleModule.register(binder, DruidCoordinator.class);