ARTEMIS-4571 race condition w/TTL impacting in-vm connections

There is a race condition between ConnectionEntry.ttl and
FailureCheckAndFlushThread whereby an in-vm connection may get closed
inadvertently due to a TTL timeout. This is because ConnectionEntry.ttl
is initialized to 60000 and then later set to -1 upon the initial Ping.
If this update happens at *just* the right time in
FailureCheckAndFlushThread then the connection will be closed.

The fix ensures that the ConnectionEntry.ttl is set to -1 for in-vm
connections from the start. It also eliminates the possibility of the
race in FailureCheckAndFlushThread.

This fix is based on static analysis of the code. The timing window is
just too small to contruct a reliable test. The failure has only been
seen in the wild a handful of times.
This commit is contained in:
Justin Bertram 2024-01-17 15:00:15 -06:00 committed by clebertsuconic
parent 99348ee672
commit 1197898232
2 changed files with 8 additions and 7 deletions

View File

@ -62,6 +62,7 @@ import org.apache.activemq.artemis.core.protocol.core.impl.wireformat.SubscribeC
import org.apache.activemq.artemis.core.protocol.core.impl.wireformat.SubscribeClusterTopologyUpdatesMessageV2;
import org.apache.activemq.artemis.core.remoting.CloseListener;
import org.apache.activemq.artemis.core.remoting.FailureListener;
import org.apache.activemq.artemis.core.remoting.impl.invm.InVMConnection;
import org.apache.activemq.artemis.core.remoting.impl.netty.ActiveMQFrameDecoder2;
import org.apache.activemq.artemis.core.remoting.impl.netty.NettyServerConnection;
import org.apache.activemq.artemis.core.server.ActiveMQServer;
@ -146,7 +147,7 @@ public class CoreProtocolManager implements ProtocolManager<Interceptor, ActiveM
channel1.setHandler(handler);
long ttl = ActiveMQClient.DEFAULT_CONNECTION_TTL;
long ttl = connection instanceof InVMConnection ? ActiveMQClient.DEFAULT_CONNECTION_TTL_INVM : ActiveMQClient.DEFAULT_CONNECTION_TTL;
if (config.getConnectionTTLOverride() != -1) {
ttl = config.getConnectionTTLOverride();

View File

@ -762,19 +762,19 @@ public class RemotingServiceImpl implements RemotingService, ServerConnectionLif
public void run() {
while (!closed) {
try {
long now = System.currentTimeMillis();
Set<Pair<Object, Long>> toRemove = new HashSet<>();
for (ConnectionEntry entry : connections.values()) {
final RemotingConnection conn = entry.connection;
final long lastCheck = entry.lastCheck;
final long ttl = entry.ttl;
final long now = System.currentTimeMillis();
boolean flush = true;
if (entry.ttl != -1) {
if (ttl != -1) {
if (!conn.checkDataReceived()) {
if (now >= entry.lastCheck + entry.ttl) {
toRemove.add(new Pair<>(conn.getID(), entry.ttl));
if (now >= lastCheck + ttl) {
toRemove.add(new Pair<>(conn.getID(), ttl));
flush = false;
}