diff --git a/server/pom.xml b/server/pom.xml index c62533763bb..14e1e574578 100644 --- a/server/pom.xml +++ b/server/pom.xml @@ -454,8 +454,9 @@ org/apache/druid/metadata/BasicDataSourceExt.class - + org/apache/druid/server/QueryResponse.class + org/apache/druid/curator/CuratorModule.class diff --git a/server/src/main/java/org/apache/druid/curator/CuratorModule.java b/server/src/main/java/org/apache/druid/curator/CuratorModule.java index 665024fd771..07c0ad84780 100644 --- a/server/src/main/java/org/apache/druid/curator/CuratorModule.java +++ b/server/src/main/java/org/apache/druid/curator/CuratorModule.java @@ -30,6 +30,7 @@ import org.apache.curator.framework.api.ACLProvider; import org.apache.curator.framework.imps.DefaultACLProvider; import org.apache.curator.retry.BoundedExponentialBackoffRetry; import org.apache.curator.shaded.com.google.common.base.Strings; +import org.apache.druid.concurrent.Threads; import org.apache.druid.guice.JsonConfigProvider; import org.apache.druid.guice.LazySingleton; import org.apache.druid.java.util.common.StringUtils; @@ -40,6 +41,7 @@ import org.apache.zookeeper.data.ACL; import java.nio.charset.StandardCharsets; import java.util.List; +import java.util.concurrent.TimeUnit; public class CuratorModule implements Module { @@ -49,6 +51,23 @@ public class CuratorModule implements Module static final int MAX_SLEEP_TIME_MS = 45000; private static final int MAX_RETRIES = 29; + private final boolean haltOnFailedStart; + + public CuratorModule() + { + this(true); + } + + /** + * + * @param haltOnFailedStart set to true if the JVM needs to be halted within 30 seconds of failed initialization + * due to unhandled curator exceptions. + */ + public CuratorModule(boolean haltOnFailedStart) + { + this.haltOnFailedStart = haltOnFailedStart; + } + @Override public void configure(Binder binder) { @@ -97,6 +116,29 @@ public class CuratorModule implements Module framework.getUnhandledErrorListenable().addListener((message, e) -> { log.error(e, "Unhandled error in Curator, stopping server."); + + if (haltOnFailedStart) { + final long startTime = System.currentTimeMillis(); + final Thread halter = new Thread( + () -> { + try { + Threads.sleepFor(30, TimeUnit.SECONDS); + } + catch (InterruptedException ignored) { + + } + log.warn( + "Could not stop server within %,d millis after unhandled Curator error. Halting immediately.", + System.currentTimeMillis() - startTime + ); + Runtime.getRuntime().halt(1); + }, + "exiter-thread" + ); + halter.setDaemon(true); + halter.start(); + } + shutdown(lifecycle); }); diff --git a/server/src/test/java/org/apache/druid/curator/CuratorModuleTest.java b/server/src/test/java/org/apache/druid/curator/CuratorModuleTest.java index 20b86dc5966..5b88cb59f43 100644 --- a/server/src/test/java/org/apache/druid/curator/CuratorModuleTest.java +++ b/server/src/test/java/org/apache/druid/curator/CuratorModuleTest.java @@ -120,7 +120,7 @@ public final class CuratorModuleTest return new StartupInjectorBuilder() .add( new LifecycleModule(), - new CuratorModule(), + new CuratorModule(false), binder -> binder.bind(Properties.class).toInstance(props) ) .build();