Suppress lease background sync failures if stopping (#40902)

If the transport service is stopped, likely because we are shutting
down, and a retention lease background sync fires the logs will display
a warn message and stacktrace. Yet, this situaton is harmless and can
happen as a normal course of business when shutting down. This commit
suppresses the log messages in this case.
This commit is contained in:
Jason Tedor 2019-04-06 10:17:29 -04:00
parent a69ff8221f
commit e44e84ab42
No known key found for this signature in database
GPG Key ID: FA89F05560F16BC5
5 changed files with 34 additions and 10 deletions

View File

@ -29,6 +29,7 @@ import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException;
import org.elasticsearch.index.Index;
import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.transport.TransportException;
import java.io.IOException;
import java.io.PrintWriter;
@ -193,6 +194,14 @@ public final class ExceptionsHelper {
return null;
}
public static boolean isTransportStoppedForAction(final Throwable t, final String action) {
final TransportException maybeTransport =
(TransportException) ExceptionsHelper.unwrap(t, TransportException.class);
return maybeTransport != null
&& (maybeTransport.getMessage().equals("TransportService is closed stopped can't send request")
|| maybeTransport.getMessage().equals("transport stopped, action: " + action));
}
/**
* Throws the specified exception. If null if specified then <code>true</code> is returned.
*/

View File

@ -38,7 +38,6 @@ import org.elasticsearch.index.shard.ReplicationGroup;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.node.NodeClosedException;
import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.transport.TransportException;
import java.io.IOException;
import java.util.ArrayList;
@ -205,10 +204,9 @@ public class ReplicationOperation<
private void onNoLongerPrimary(Exception failure) {
final Throwable cause = ExceptionsHelper.unwrapCause(failure);
final boolean nodeIsClosing = cause instanceof NodeClosedException
|| (cause instanceof TransportException &&
("TransportService is closed stopped can't send request".equals(cause.getMessage())
|| "transport stopped, action: internal:cluster/shard/failure".equals(cause.getMessage())));
final boolean nodeIsClosing =
cause instanceof NodeClosedException
|| ExceptionsHelper.isTransportStoppedForAction(cause, "internal:cluster/shard/failure");
final String message;
if (nodeIsClosing) {
message = String.format(Locale.ROOT,

View File

@ -113,9 +113,15 @@ public class RetentionLeaseBackgroundSyncAction extends TransportReplicationActi
ActionListener.wrap(
r -> {},
e -> {
if (ExceptionsHelper.unwrap(e, AlreadyClosedException.class, IndexShardClosedException.class) == null) {
getLogger().warn(new ParameterizedMessage("{} retention lease background sync failed", shardId), e);
if (ExceptionsHelper.isTransportStoppedForAction(e, ACTION_NAME + "[p]")) {
// we are likely shutting down
return;
}
if (ExceptionsHelper.unwrap(e, AlreadyClosedException.class, IndexShardClosedException.class) != null) {
// the shard is closed
return;
}
getLogger().warn(new ParameterizedMessage("{} retention lease background sync failed", shardId), e);
}));
}
}

View File

@ -274,6 +274,7 @@ public class TransportService extends AbstractLifecycleComponent implements Tran
}
@Override
public void doRun() {
// cf. ExceptionsHelper#isTransportStoppedForAction
TransportException ex = new TransportException("transport stopped, action: " + holderToNotify.action());
holderToNotify.handler().handleException(ex);
}
@ -626,8 +627,13 @@ public class TransportService extends AbstractLifecycleComponent implements Tran
}
try {
if (lifecycle.stoppedOrClosed()) {
// if we are not started the exception handling will remove the RequestHolder again and calls the handler to notify
// the caller. It will only notify if the toStop code hasn't done the work yet.
/*
* If we are not started the exception handling will remove the request holder again and calls the handler to notify the
* caller. It will only notify if toStop hasn't done the work yet.
*
* Do not edit this exception message, it is currently relied upon in production code!
*/
// TODO: make a dedicated exception for a stopped transport service? cf. ExceptionsHelper#isTransportStoppedForAction
throw new TransportException("TransportService is closed stopped can't send request");
}
if (timeoutHandler != null) {

View File

@ -42,6 +42,7 @@ import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.test.transport.CapturingTransport;
import org.elasticsearch.threadpool.TestThreadPool;
import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.transport.TransportException;
import org.elasticsearch.transport.TransportService;
import org.mockito.ArgumentCaptor;
@ -204,9 +205,13 @@ public class RetentionLeaseBackgroundSyncActionTests extends ESTestCase {
final Exception e = randomFrom(
new AlreadyClosedException("closed"),
new IndexShardClosedException(indexShard.shardId()),
new TransportException(randomFrom(
"failed",
"TransportService is closed stopped can't send request",
"transport stopped, action: indices:admin/seq_no/retention_lease_background_sync[p]")),
new RuntimeException("failed"));
listener.onFailure(e);
if (e instanceof AlreadyClosedException == false && e instanceof IndexShardClosedException == false) {
if (e.getMessage().equals("failed")) {
final ArgumentCaptor<ParameterizedMessage> captor = ArgumentCaptor.forClass(ParameterizedMessage.class);
verify(retentionLeaseSyncActionLogger).warn(captor.capture(), same(e));
final ParameterizedMessage message = captor.getValue();