ARTEMIS-4571 race condition w/TTL impacting in-vm connections
There is a race condition between ConnectionEntry.ttl and FailureCheckAndFlushThread whereby an in-vm connection may get closed inadvertently due to a TTL timeout. This is because ConnectionEntry.ttl is initialized to 60000 and then later set to -1 upon the initial Ping. If this update happens at *just* the right time in FailureCheckAndFlushThread then the connection will be closed. The fix ensures that the ConnectionEntry.ttl is set to -1 for in-vm connections from the start. It also eliminates the possibility of the race in FailureCheckAndFlushThread. This fix is based on static analysis of the code. The timing window is just too small to contruct a reliable test. The failure has only been seen in the wild a handful of times.
This commit is contained in:
parent
99348ee672
commit
1197898232
|
@ -62,6 +62,7 @@ import org.apache.activemq.artemis.core.protocol.core.impl.wireformat.SubscribeC
|
|||
import org.apache.activemq.artemis.core.protocol.core.impl.wireformat.SubscribeClusterTopologyUpdatesMessageV2;
|
||||
import org.apache.activemq.artemis.core.remoting.CloseListener;
|
||||
import org.apache.activemq.artemis.core.remoting.FailureListener;
|
||||
import org.apache.activemq.artemis.core.remoting.impl.invm.InVMConnection;
|
||||
import org.apache.activemq.artemis.core.remoting.impl.netty.ActiveMQFrameDecoder2;
|
||||
import org.apache.activemq.artemis.core.remoting.impl.netty.NettyServerConnection;
|
||||
import org.apache.activemq.artemis.core.server.ActiveMQServer;
|
||||
|
@ -146,7 +147,7 @@ public class CoreProtocolManager implements ProtocolManager<Interceptor, ActiveM
|
|||
|
||||
channel1.setHandler(handler);
|
||||
|
||||
long ttl = ActiveMQClient.DEFAULT_CONNECTION_TTL;
|
||||
long ttl = connection instanceof InVMConnection ? ActiveMQClient.DEFAULT_CONNECTION_TTL_INVM : ActiveMQClient.DEFAULT_CONNECTION_TTL;
|
||||
|
||||
if (config.getConnectionTTLOverride() != -1) {
|
||||
ttl = config.getConnectionTTLOverride();
|
||||
|
|
|
@ -762,19 +762,19 @@ public class RemotingServiceImpl implements RemotingService, ServerConnectionLif
|
|||
public void run() {
|
||||
while (!closed) {
|
||||
try {
|
||||
long now = System.currentTimeMillis();
|
||||
|
||||
Set<Pair<Object, Long>> toRemove = new HashSet<>();
|
||||
|
||||
for (ConnectionEntry entry : connections.values()) {
|
||||
final RemotingConnection conn = entry.connection;
|
||||
final long lastCheck = entry.lastCheck;
|
||||
final long ttl = entry.ttl;
|
||||
final long now = System.currentTimeMillis();
|
||||
|
||||
boolean flush = true;
|
||||
|
||||
if (entry.ttl != -1) {
|
||||
if (ttl != -1) {
|
||||
if (!conn.checkDataReceived()) {
|
||||
if (now >= entry.lastCheck + entry.ttl) {
|
||||
toRemove.add(new Pair<>(conn.getID(), entry.ttl));
|
||||
if (now >= lastCheck + ttl) {
|
||||
toRemove.add(new Pair<>(conn.getID(), ttl));
|
||||
|
||||
flush = false;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue