diff --git a/modules/transport-netty4/src/main/java/org/elasticsearch/http/netty4/Netty4HttpChannel.java b/modules/transport-netty4/src/main/java/org/elasticsearch/http/netty4/Netty4HttpChannel.java index 981a417449f..73135c2a145 100644 --- a/modules/transport-netty4/src/main/java/org/elasticsearch/http/netty4/Netty4HttpChannel.java +++ b/modules/transport-netty4/src/main/java/org/elasticsearch/http/netty4/Netty4HttpChannel.java @@ -21,11 +21,11 @@ package org.elasticsearch.http.netty4; import io.netty.channel.Channel; import io.netty.channel.ChannelPromise; +import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.action.ActionListener; import org.elasticsearch.common.concurrent.CompletableContext; import org.elasticsearch.http.HttpChannel; import org.elasticsearch.http.HttpResponse; -import org.elasticsearch.transport.netty4.Netty4Utils; import java.net.InetSocketAddress; @@ -42,7 +42,7 @@ public class Netty4HttpChannel implements HttpChannel { } else { Throwable cause = f.cause(); if (cause instanceof Error) { - Netty4Utils.maybeDie(cause); + ExceptionsHelper.maybeDieOnAnotherThread(cause); closeContext.completeExceptionally(new Exception(cause)); } else { closeContext.completeExceptionally((Exception) cause); @@ -59,7 +59,7 @@ public class Netty4HttpChannel implements HttpChannel { listener.onResponse(null); } else { final Throwable cause = f.cause(); - Netty4Utils.maybeDie(cause); + ExceptionsHelper.maybeDieOnAnotherThread(cause); if (cause instanceof Error) { listener.onFailure(new Exception(cause)); } else { diff --git a/modules/transport-netty4/src/main/java/org/elasticsearch/http/netty4/Netty4HttpRequestHandler.java b/modules/transport-netty4/src/main/java/org/elasticsearch/http/netty4/Netty4HttpRequestHandler.java index ab078ad10d3..472e34d09fc 100644 --- a/modules/transport-netty4/src/main/java/org/elasticsearch/http/netty4/Netty4HttpRequestHandler.java +++ b/modules/transport-netty4/src/main/java/org/elasticsearch/http/netty4/Netty4HttpRequestHandler.java @@ -27,7 +27,6 @@ import io.netty.handler.codec.http.DefaultFullHttpRequest; import io.netty.handler.codec.http.FullHttpRequest; import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.http.HttpPipelinedRequest; -import org.elasticsearch.transport.netty4.Netty4Utils; @ChannelHandler.Sharable class Netty4HttpRequestHandler extends SimpleChannelInboundHandler> { @@ -58,7 +57,7 @@ class Netty4HttpRequestHandler extends SimpleChannelInboundHandler maybeError = ExceptionsHelper.maybeError(cause, logger); - if (maybeError.isPresent()) { - /* - * Here be dragons. We want to rethrow this so that it bubbles up to the uncaught exception handler. Yet, Netty wraps too many - * invocations of user-code in try/catch blocks that swallow all throwables. This means that a rethrow here will not bubble up - * to where we want it to. So, we fork a thread and throw the exception from there where Netty can not get to it. We do not wrap - * the exception so as to not lose the original cause during exit. - */ - try { - // try to log the current stack trace - final String formatted = ExceptionsHelper.formatStackTrace(Thread.currentThread().getStackTrace()); - logger.error("fatal error on the network layer\n{}", formatted); - } finally { - new Thread( - () -> { - throw maybeError.get(); - }) - .start(); - } - } - } - } diff --git a/plugins/transport-nio/src/main/java/org/elasticsearch/http/nio/HttpReadWriteHandler.java b/plugins/transport-nio/src/main/java/org/elasticsearch/http/nio/HttpReadWriteHandler.java index 3dcd59cf8e2..17a5c1fb97e 100644 --- a/plugins/transport-nio/src/main/java/org/elasticsearch/http/nio/HttpReadWriteHandler.java +++ b/plugins/transport-nio/src/main/java/org/elasticsearch/http/nio/HttpReadWriteHandler.java @@ -139,7 +139,7 @@ public class HttpReadWriteHandler implements ReadWriteHandler { if (request.decoderResult().isFailure()) { Throwable cause = request.decoderResult().cause(); if (cause instanceof Error) { - ExceptionsHelper.dieOnError(cause); + ExceptionsHelper.maybeDieOnAnotherThread(cause); transport.incomingRequestError(httpRequest, nioHttpChannel, new Exception(cause)); } else { transport.incomingRequestError(httpRequest, nioHttpChannel, (Exception) cause); diff --git a/plugins/transport-nio/src/main/java/org/elasticsearch/http/nio/NettyAdaptor.java b/plugins/transport-nio/src/main/java/org/elasticsearch/http/nio/NettyAdaptor.java index 41cb72aa322..133206e1322 100644 --- a/plugins/transport-nio/src/main/java/org/elasticsearch/http/nio/NettyAdaptor.java +++ b/plugins/transport-nio/src/main/java/org/elasticsearch/http/nio/NettyAdaptor.java @@ -73,7 +73,7 @@ public class NettyAdaptor implements AutoCloseable { closeFuture.await(); if (closeFuture.isSuccess() == false) { Throwable cause = closeFuture.cause(); - ExceptionsHelper.dieOnError(cause); + ExceptionsHelper.maybeDieOnAnotherThread(cause); throw (Exception) cause; } } @@ -84,7 +84,7 @@ public class NettyAdaptor implements AutoCloseable { listener.accept(null, null); } else { final Throwable cause = f.cause(); - ExceptionsHelper.dieOnError(cause); + ExceptionsHelper.maybeDieOnAnotherThread(cause); assert cause instanceof Exception; listener.accept(null, (Exception) cause); } diff --git a/plugins/transport-nio/src/main/java/org/elasticsearch/http/nio/NettyListener.java b/plugins/transport-nio/src/main/java/org/elasticsearch/http/nio/NettyListener.java index 2cdaa4708d1..637bbafff8e 100644 --- a/plugins/transport-nio/src/main/java/org/elasticsearch/http/nio/NettyListener.java +++ b/plugins/transport-nio/src/main/java/org/elasticsearch/http/nio/NettyListener.java @@ -223,7 +223,7 @@ public class NettyListener implements BiConsumer, ChannelPromis biConsumer.accept(null, null); } else { if (cause instanceof Error) { - ExceptionsHelper.dieOnError(cause); + ExceptionsHelper.maybeDieOnAnotherThread(cause); biConsumer.accept(null, new Exception(cause)); } else { biConsumer.accept(null, (Exception) cause); diff --git a/qa/die-with-dignity/src/test/java/org/elasticsearch/qa/die_with_dignity/DieWithDignityIT.java b/qa/die-with-dignity/src/test/java/org/elasticsearch/qa/die_with_dignity/DieWithDignityIT.java index 992d3ce71f6..9250122025c 100644 --- a/qa/die-with-dignity/src/test/java/org/elasticsearch/qa/die_with_dignity/DieWithDignityIT.java +++ b/qa/die-with-dignity/src/test/java/org/elasticsearch/qa/die_with_dignity/DieWithDignityIT.java @@ -90,14 +90,14 @@ public class DieWithDignityIT extends ESRestTestCase { final Iterator it = lines.iterator(); - boolean fatalErrorOnTheNetworkLayer = false; + boolean fatalError = false; boolean fatalErrorInThreadExiting = false; - while (it.hasNext() && (fatalErrorOnTheNetworkLayer == false || fatalErrorInThreadExiting == false)) { + while (it.hasNext() && (fatalError == false || fatalErrorInThreadExiting == false)) { final String line = it.next(); - if (line.contains("fatal error on the network layer")) { - fatalErrorOnTheNetworkLayer = true; - } else if (line.matches(".*\\[ERROR\\]\\[o.e.b.ElasticsearchUncaughtExceptionHandler\\] \\[node-0\\]" + if (line.matches(".*\\[ERROR\\]\\[o\\.e\\.ExceptionsHelper\\s*\\] \\[node-0\\] fatal error")) { + fatalError = true; + } else if (line.matches(".*\\[ERROR\\]\\[o\\.e\\.b\\.ElasticsearchUncaughtExceptionHandler\\] \\[node-0\\]" + " fatal error in thread \\[Thread-\\d+\\], exiting$")) { fatalErrorInThreadExiting = true; assertTrue(it.hasNext()); @@ -105,7 +105,7 @@ public class DieWithDignityIT extends ESRestTestCase { } } - assertTrue(fatalErrorOnTheNetworkLayer); + assertTrue(fatalError); assertTrue(fatalErrorInThreadExiting); } diff --git a/server/src/main/java/org/elasticsearch/ExceptionsHelper.java b/server/src/main/java/org/elasticsearch/ExceptionsHelper.java index 09347f519fb..9f756666217 100644 --- a/server/src/main/java/org/elasticsearch/ExceptionsHelper.java +++ b/server/src/main/java/org/elasticsearch/ExceptionsHelper.java @@ -136,42 +136,6 @@ public final class ExceptionsHelper { return Arrays.stream(stackTrace).skip(1).map(e -> "\tat " + e).collect(Collectors.joining("\n")); } - static final int MAX_ITERATIONS = 1024; - - /** - * Unwrap the specified throwable looking for any suppressed errors or errors as a root cause of the specified throwable. - * - * @param cause the root throwable - * - * @return an optional error if one is found suppressed or a root cause in the tree rooted at the specified throwable - */ - public static Optional maybeError(final Throwable cause, final Logger logger) { - // early terminate if the cause is already an error - if (cause instanceof Error) { - return Optional.of((Error) cause); - } - - final Queue queue = new LinkedList<>(); - queue.add(cause); - int iterations = 0; - while (!queue.isEmpty()) { - iterations++; - if (iterations > MAX_ITERATIONS) { - logger.warn("giving up looking for fatal errors", cause); - break; - } - final Throwable current = queue.remove(); - if (current instanceof Error) { - return Optional.of((Error) current); - } - Collections.addAll(queue, current.getSuppressed()); - if (current.getCause() != null) { - queue.add(current.getCause()); - } - } - return Optional.empty(); - } - /** * Rethrows the first exception in the list and adds all remaining to the suppressed list. * If the given list is empty no exception is thrown @@ -243,13 +207,50 @@ public final class ExceptionsHelper { return true; } + static final int MAX_ITERATIONS = 1024; + + /** + * Unwrap the specified throwable looking for any suppressed errors or errors as a root cause of the specified throwable. + * + * @param cause the root throwable + * @return an optional error if one is found suppressed or a root cause in the tree rooted at the specified throwable + */ + public static Optional maybeError(final Throwable cause, final Logger logger) { + // early terminate if the cause is already an error + if (cause instanceof Error) { + return Optional.of((Error) cause); + } + + final Queue queue = new LinkedList<>(); + queue.add(cause); + int iterations = 0; + while (queue.isEmpty() == false) { + iterations++; + // this is a guard against deeply nested or circular chains of exceptions + if (iterations > MAX_ITERATIONS) { + logger.warn("giving up looking for fatal errors", cause); + break; + } + final Throwable current = queue.remove(); + if (current instanceof Error) { + return Optional.of((Error) current); + } + Collections.addAll(queue, current.getSuppressed()); + if (current.getCause() != null) { + queue.add(current.getCause()); + } + } + return Optional.empty(); + } + /** * If the specified cause is an unrecoverable error, this method will rethrow the cause on a separate thread so that it can not be - * caught and bubbles up to the uncaught exception handler. + * caught and bubbles up to the uncaught exception handler. Note that the cause tree is examined for any {@link Error}. See + * {@link #maybeError(Throwable, Logger)} for the semantics. * - * @param throwable the throwable to test + * @param throwable the throwable to possibly throw on another thread */ - public static void dieOnError(Throwable throwable) { + public static void maybeDieOnAnotherThread(final Throwable throwable) { ExceptionsHelper.maybeError(throwable, logger).ifPresent(error -> { /* * Here be dragons. We want to rethrow this so that it bubbles up to the uncaught exception handler. Yet, sometimes the stack diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/scheduler/SchedulerEngine.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/scheduler/SchedulerEngine.java index 3f99818f31a..30e41734906 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/scheduler/SchedulerEngine.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/scheduler/SchedulerEngine.java @@ -194,11 +194,11 @@ public class SchedulerEngine { /* * Allowing the throwable to escape here will lead to be it being caught in FutureTask#run and set as the outcome of this * task; however, we never inspect the the outcomes of these scheduled tasks and so allowing the throwable to escape - * unhandled here could lead to us losing fatal errors. Instead, we rely on ExceptionsHelper#dieOnError to appropriately - * dispatch any error to the uncaught exception handler. We should never see an exception here as these do not escape from - * SchedulerEngine#notifyListeners. + * unhandled here could lead to us losing fatal errors. Instead, we rely on ExceptionsHelper#maybeThrowErrorOnAnotherThread + * to appropriately dispatch any error to the uncaught exception handler. We should never see an exception here as these do + * not escape from SchedulerEngine#notifyListeners. */ - ExceptionsHelper.dieOnError(t); + ExceptionsHelper.maybeDieOnAnotherThread(t); throw t; } scheduleNextRun(triggeredTime);