Avoid blocking on channel close on network thread (#25521)

Currently when we close a channel in Netty4Utils.closeChannels we
block until the closing is complete. This introduces the possibility
that a network selector thread will block while waiting until a
separate network selector thread closes a channel.

For instance: T1 closes channel 1 (which is assigned to a T1 selector).
Channel 1's close listener executes the closing of the node. That
means that T1 now tries to close channel 2. However, channel 2 is
assigned to a selector that is running on T2. T1 now must wait until T2
closes that channel at some point in the future.

This commit addresses this by adding a boolean to closeChannels
indicating if we should block on close. We only set this boolean to true
if we are closing down the server channels at shutdown. This call is
never made from a network thread. When we call the closeChannels method
with that boolean set to false, we do not block on close.
This commit is contained in:
Tim Brooks 2017-07-10 10:50:51 -05:00 committed by GitHub
parent 8cf0528001
commit b22bbf94da
5 changed files with 56 additions and 66 deletions

View File

@ -442,7 +442,7 @@ public abstract class TcpTransport<Channel> extends AbstractLifecycleComponent i
public void close() throws IOException {
if (closed.compareAndSet(false, true)) {
try {
closeChannels(Arrays.stream(channels).filter(Objects::nonNull).collect(Collectors.toList()));
closeChannels(Arrays.stream(channels).filter(Objects::nonNull).collect(Collectors.toList()), false);
} finally {
transportServiceAdapter.onConnectionClosed(this);
}
@ -640,7 +640,7 @@ public abstract class TcpTransport<Channel> extends AbstractLifecycleComponent i
protected final void closeChannelWhileHandlingExceptions(final Channel channel) {
if (isOpen(channel)) {
try {
closeChannels(Collections.singletonList(channel));
closeChannels(Collections.singletonList(channel), false);
} catch (IOException e) {
logger.warn("failed to close channel", e);
}
@ -902,7 +902,7 @@ public abstract class TcpTransport<Channel> extends AbstractLifecycleComponent i
// first stop to accept any incoming connections so nobody can connect to this transport
for (Map.Entry<String, List<Channel>> entry : serverChannels.entrySet()) {
try {
closeChannels(entry.getValue());
closeChannels(entry.getValue(), true);
} catch (Exception e) {
logger.debug(
(Supplier<?>) () -> new ParameterizedMessage(
@ -975,7 +975,7 @@ public abstract class TcpTransport<Channel> extends AbstractLifecycleComponent i
@Override
protected void innerInnerOnResponse(Channel channel) {
try {
closeChannels(Collections.singletonList(channel));
closeChannels(Collections.singletonList(channel), false);
} catch (IOException e1) {
logger.debug("failed to close httpOnTransport channel", e1);
}
@ -984,7 +984,7 @@ public abstract class TcpTransport<Channel> extends AbstractLifecycleComponent i
@Override
protected void innerOnFailure(Exception e) {
try {
closeChannels(Collections.singletonList(channel));
closeChannels(Collections.singletonList(channel), false);
} catch (IOException e1) {
e.addSuppressed(e1);
logger.debug("failed to close httpOnTransport channel", e1);
@ -1015,9 +1015,14 @@ public abstract class TcpTransport<Channel> extends AbstractLifecycleComponent i
protected abstract Channel bind(String name, InetSocketAddress address) throws IOException;
/**
* Closes all channels in this list
* Closes all channels in this list. If the blocking boolean is set to true, the channels must be
* closed before the method returns. This should never be called with blocking set to true from a
* network thread.
*
* @param channels the channels to close
* @param blocking whether the channels should be closed synchronously
*/
protected abstract void closeChannels(List<Channel> channel) throws IOException;
protected abstract void closeChannels(List<Channel> channels, boolean blocking) throws IOException;
/**
* Sends message to channel. The listener's onResponse method will be called when the send is complete unless an exception

View File

@ -191,7 +191,7 @@ public class TCPTransportTests extends ESTestCase {
}
@Override
protected void closeChannels(List channel) throws IOException {
protected void closeChannels(List channel, boolean blocking) throws IOException {
}

View File

@ -70,7 +70,6 @@ import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.TimeUnit;
import java.util.function.Consumer;
@ -341,8 +340,21 @@ public class Netty4Transport extends TcpTransport<Channel> {
}
@Override
protected void closeChannels(final List<Channel> channels) throws IOException {
protected void closeChannels(final List<Channel> channels, boolean blocking) throws IOException {
if (blocking) {
Netty4Utils.closeChannels(channels);
} else {
for (Channel channel : channels) {
if (channel != null && channel.isOpen()) {
ChannelFuture closeFuture = channel.close();
closeFuture.addListener((f) -> {
if (f.isSuccess() == false) {
logger.warn("failed to close channel", f.cause());
}
});
}
}
}
}
@Override

View File

@ -53,7 +53,6 @@ import java.net.SocketTimeoutException;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CountDownLatch;
@ -243,7 +242,7 @@ public class MockTcpTransport extends TcpTransport<MockTcpTransport.MockChannel>
}
@Override
protected void closeChannels(List<MockChannel> channel) throws IOException {
protected void closeChannels(List<MockChannel> channel, boolean blocking) throws IOException {
IOUtils.close(channel);
}

View File

@ -37,6 +37,7 @@ import org.elasticsearch.transport.ConnectionProfile;
import org.elasticsearch.transport.TcpTransport;
import org.elasticsearch.transport.Transports;
import org.elasticsearch.transport.nio.channel.ChannelFactory;
import org.elasticsearch.transport.nio.channel.CloseFuture;
import org.elasticsearch.transport.nio.channel.NioChannel;
import org.elasticsearch.transport.nio.channel.NioServerSocketChannel;
import org.elasticsearch.transport.nio.channel.NioSocketChannel;
@ -45,7 +46,6 @@ import java.io.IOException;
import java.net.InetSocketAddress;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.ThreadFactory;
import java.util.function.Consumer;
@ -100,28 +100,29 @@ public class NioTransport extends TcpTransport<NioChannel> {
}
@Override
protected void closeChannels(List<NioChannel> channels) throws IOException {
IOException closingExceptions = null;
protected void closeChannels(List<NioChannel> channels, boolean blocking) throws IOException {
ArrayList<CloseFuture> futures = new ArrayList<>(channels.size());
for (final NioChannel channel : channels) {
if (channel != null && channel.isOpen()) {
try {
// If we are currently on the selector thread that handles this channel, we should prefer
// the closeFromSelector method. This method always closes the channel immediately.
ESSelector selector = channel.getSelector();
if (selector != null && selector.isOnCurrentThread()) {
channel.closeFromSelector();
} else {
channel.closeAsync().awaitClose();
}
} catch (Exception e) {
if (closingExceptions == null) {
closingExceptions = new IOException("failed to close channels");
}
closingExceptions.addSuppressed(e.getCause());
futures.add(channel.closeAsync());
}
}
if (blocking == false) {
return;
}
IOException closingExceptions = null;
for (CloseFuture future : futures) {
try {
future.awaitClose();
IOException closeException = future.getCloseException();
if (closeException != null) {
closingExceptions = addClosingException(closingExceptions, closeException);
}
} catch (InterruptedException e) {
closingExceptions = addClosingException(closingExceptions, e);
}
}
if (closingExceptions != null) {
throw closingExceptions;
}
@ -226,47 +227,20 @@ public class NioTransport extends TcpTransport<NioChannel> {
onException(channel, t instanceof Exception ? (Exception) t : new ElasticsearchException(t));
}
private Settings createFallbackSettings() {
Settings.Builder fallbackSettingsBuilder = Settings.builder();
List<String> fallbackBindHost = TcpTransport.BIND_HOST.get(settings);
if (fallbackBindHost.isEmpty() == false) {
fallbackSettingsBuilder.putArray("bind_host", fallbackBindHost);
}
List<String> fallbackPublishHost = TcpTransport.PUBLISH_HOST.get(settings);
if (fallbackPublishHost.isEmpty() == false) {
fallbackSettingsBuilder.putArray("publish_host", fallbackPublishHost);
}
boolean fallbackTcpNoDelay = TcpTransport.TCP_NO_DELAY.get(settings);
fallbackSettingsBuilder.put("tcp_no_delay", fallbackTcpNoDelay);
boolean fallbackTcpKeepAlive = TcpTransport.TCP_KEEP_ALIVE.get(settings);
fallbackSettingsBuilder.put("tcp_keep_alive", fallbackTcpKeepAlive);
boolean fallbackReuseAddress = TcpTransport.TCP_REUSE_ADDRESS.get(settings);;
fallbackSettingsBuilder.put("reuse_address", fallbackReuseAddress);
ByteSizeValue fallbackTcpSendBufferSize = TcpTransport.TCP_SEND_BUFFER_SIZE.get(settings);
if (fallbackTcpSendBufferSize.getBytes() >= 0) {
fallbackSettingsBuilder.put("tcp_send_buffer_size", fallbackTcpSendBufferSize);
}
ByteSizeValue fallbackTcpBufferSize = TcpTransport.TCP_RECEIVE_BUFFER_SIZE.get(settings);;
if (fallbackTcpBufferSize.getBytes() >= 0) {
fallbackSettingsBuilder.put("tcp_receive_buffer_size", fallbackTcpBufferSize);
}
return fallbackSettingsBuilder.build();
}
private NioClient createClient() {
Supplier<SocketSelector> selectorSupplier = new RoundRobinSelectorSupplier(socketSelectors);
ChannelFactory channelFactory = new ChannelFactory(new ProfileSettings(settings, "default"), tcpReadHandler);
return new NioClient(logger, openChannels, selectorSupplier, defaultConnectionProfile.getConnectTimeout(), channelFactory);
}
private IOException addClosingException(IOException closingExceptions, Exception e) {
if (closingExceptions == null) {
closingExceptions = new IOException("failed to close channels");
}
closingExceptions.addSuppressed(e);
return closingExceptions;
}
class ClientChannelCloseListener implements Consumer<NioChannel> {
private final Consumer<NioChannel> consumer;