ARTEMIS-2862 activation failure can cause zombie broker

In certain cases with shared-store HA a broker's activation can fail but
the broker will still be holding the journal lock. This results in a
"zombie" broker which can't actually service clients and prevents the
backup from activating.

This commit adds an ActivationFailureListener to catch activation
failures and stop the broker completely.
This commit is contained in:
Justin Bertram 2020-07-31 14:27:23 -05:00 committed by Clebert Suconic
parent 02b8135e8d
commit af7c6882da
3 changed files with 38 additions and 16 deletions

View File

@ -19,6 +19,7 @@ package org.apache.activemq.artemis.cli.commands;
import java.io.File;
import java.util.Timer;
import java.util.TimerTask;
import java.util.concurrent.atomic.AtomicBoolean;
import io.airlift.airline.Command;
import io.airlift.airline.Option;
@ -50,6 +51,8 @@ public class Run extends LockAbstract {
private ManagementContext managementContext;
private Timer shutdownTimer;
/**
* This will disable the System.exit at the end of the server.stop, as that means there are other things
* happening on the same VM.
@ -81,6 +84,9 @@ public class Run extends LockAbstract {
server = BrokerFactory.createServer(broker.server, security);
managementContext.start();
server.createComponents();
AtomicBoolean serverActivationFailed = new AtomicBoolean(false);
server.getServer().registerActivationFailureListener(exception -> serverActivationFailed.set(true));
server.start();
server.getServer().addExternalComponent(managementContext);
@ -92,8 +98,12 @@ public class Run extends LockAbstract {
Class clazz = this.getClass().getClassLoader().loadClass(componentDTO.componentClassName);
ExternalComponent component = (ExternalComponent) clazz.newInstance();
component.configure(componentDTO, getBrokerInstance(), getBrokerHome());
component.start();
server.getServer().addExternalComponent(component);
component.start();
}
if (serverActivationFailed.get()) {
stop();
}
} catch (Throwable t) {
t.printStackTrace();
@ -123,8 +133,8 @@ public class Run extends LockAbstract {
}
}
final Timer timer = new Timer("ActiveMQ Artemis Server Shutdown Timer", true);
timer.scheduleAtFixedRate(new TimerTask() {
shutdownTimer = new Timer("ActiveMQ Artemis Server Shutdown Timer", true);
shutdownTimer.scheduleAtFixedRate(new TimerTask() {
@Override
public void run() {
if (allowKill && fileKill.exists()) {
@ -138,7 +148,7 @@ public class Run extends LockAbstract {
if (file.exists()) {
try {
stop();
timer.cancel();
shutdownTimer.cancel();
} finally {
System.out.println("Server stopped!");
System.out.flush();
@ -168,6 +178,9 @@ public class Run extends LockAbstract {
if (managementContext != null) {
managementContext.stop();
}
if (shutdownTimer != null) {
shutdownTimer.cancel();
}
} catch (Exception e) {
e.printStackTrace();
}

View File

@ -24,5 +24,7 @@ import org.apache.activemq.artemis.core.server.ServiceComponent;
*/
public interface Broker extends ServiceComponent {
void createComponents() throws Exception;
ActiveMQServer getServer();
}

View File

@ -51,18 +51,9 @@ public class FileBroker implements Broker {
return;
}
//todo if we start to pullout more configs from the main config then we should pull out the configuration objects from factories if available
FileConfiguration configuration = new FileConfiguration();
LegacyJMSConfiguration legacyJMSConfiguration = new LegacyJMSConfiguration(configuration);
FileDeploymentManager fileDeploymentManager = new FileDeploymentManager(configurationUrl);
fileDeploymentManager.addDeployable(configuration).addDeployable(legacyJMSConfiguration);
fileDeploymentManager.readConfiguration();
createDirectories(configuration);
components = fileDeploymentManager.buildService(securityManager, ManagementFactory.getPlatformMBeanServer());
if (components == null) {
createComponents();
}
ArrayList<ActiveMQComponent> componentsByStartOrder = getComponentsByStartOrder(components);
ActiveMQBootstrapLogger.LOGGER.serverStarting();
@ -112,6 +103,22 @@ public class FileBroker implements Broker {
return components;
}
@Override
public void createComponents() throws Exception {
//todo if we start to pullout more configs from the main config then we should pull out the configuration objects from factories if available
FileConfiguration configuration = new FileConfiguration();
LegacyJMSConfiguration legacyJMSConfiguration = new LegacyJMSConfiguration(configuration);
FileDeploymentManager fileDeploymentManager = new FileDeploymentManager(configurationUrl);
fileDeploymentManager.addDeployable(configuration).addDeployable(legacyJMSConfiguration);
fileDeploymentManager.readConfiguration();
createDirectories(configuration);
components = fileDeploymentManager.buildService(securityManager, ManagementFactory.getPlatformMBeanServer());
}
/*
* this makes sure the components are started in the correct order. Its simple at the mo as e only have core and jms but
* will need impproving if we get more.