ARTEMIS-3227 Web Console could be shutdown after certain failures

This commit is contained in:
Clebert Suconic 2021-04-08 11:47:13 -04:00
parent c56912c3d5
commit faf32fe550
6 changed files with 101 additions and 25 deletions

View File

@ -561,19 +561,24 @@ public class ActiveMQServerImpl implements ActiveMQServer {
ActiveMQServerLogger.LOGGER.failedToStartServer(t);
} finally {
if (originalState == SERVER_STATE.STOPPED) {
networkHealthCheck.setTimeUnit(TimeUnit.MILLISECONDS).setPeriod(configuration.getNetworkCheckPeriod()).
setNetworkTimeout(configuration.getNetworkCheckTimeout()).
parseAddressList(configuration.getNetworkCheckList()).
parseURIList(configuration.getNetworkCheckURLList()).
setNICName(configuration.getNetworkCheckNIC()).
setIpv4Command(configuration.getNetworkCheckPingCommand()).
setIpv6Command(configuration.getNetworkCheckPing6Command());
reloadNetworkHealthCheck();
networkHealthCheck.addComponent(networkCheckMonitor);
}
}
}
public void reloadNetworkHealthCheck() {
networkHealthCheck.setTimeUnit(TimeUnit.MILLISECONDS).setPeriod(configuration.getNetworkCheckPeriod()).
setNetworkTimeout(configuration.getNetworkCheckTimeout()).
parseAddressList(configuration.getNetworkCheckList()).
parseURIList(configuration.getNetworkCheckURLList()).
setNICName(configuration.getNetworkCheckNIC()).
setIpv4Command(configuration.getNetworkCheckPingCommand()).
setIpv6Command(configuration.getNetworkCheckPing6Command());
networkHealthCheck.addComponent(networkCheckMonitor);
}
@Override
public CriticalAnalyzer getCriticalAnalyzer() {
return this.analyzer;

View File

@ -265,10 +265,13 @@ public final class SharedNothingBackupActivation extends Activation {
}
if (activeMQServer.getState() != ActiveMQServer.SERVER_STATE.STOPPED &&
activeMQServer.getState() != ActiveMQServer.SERVER_STATE.STOPPING) {
activeMQServer.stop();
if (signalToStop == SharedNothingBackupQuorum.BACKUP_ACTIVATION.FAILURE_RETRY) {
activeMQServer.stop(false);
logger.trace("The server was shutdown for a network isolation, we keep retrying");
activeMQServer.start();
} else {
activeMQServer.stop();
}
}
} catch (Exception e) {

View File

@ -45,9 +45,11 @@ public class BackupAuthenticationTest extends FailoverTestBase {
@Test
public void testWrongPasswordSetting() throws Exception {
FakeServiceComponent fakeServiceComponent = new FakeServiceComponent("fake web server");
Wait.assertTrue(liveServer.getServer()::isActive);
waitForServerToStart(liveServer.getServer());
backupServer.start();
backupServer.getServer().addExternalComponent(fakeServiceComponent, true);
assertTrue(latch.await(5, TimeUnit.SECONDS));
/*
* can't intercept the message at the backup, so we intercept the registration message at the
@ -55,6 +57,7 @@ public class BackupAuthenticationTest extends FailoverTestBase {
*/
Wait.waitFor(() -> !backupServer.isStarted());
assertFalse("backup should have stopped", backupServer.isStarted());
Wait.assertFalse(fakeServiceComponent::isStarted);
backupServer.stop();
liveServer.stop();
}

View File

@ -0,0 +1,59 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.tests.integration.cluster.failover;
import org.apache.activemq.artemis.core.server.ServiceComponent;
/** used by tests that are simulating a WebServer that should or should not go down */
public class FakeServiceComponent implements ServiceComponent {
final String description;
public FakeServiceComponent(String description) {
this.description = description;
}
boolean started = false;
@Override
public String toString() {
return description;
}
@Override
public void start() throws Exception {
started = true;
}
@Override
public void stop() throws Exception {
stop(true);
}
@Override
public boolean isStarted() {
return started;
}
@Override
public void stop(boolean shutdown) throws Exception {
if (shutdown) {
started = false;
}
}
}

View File

@ -24,6 +24,7 @@ import org.apache.activemq.artemis.api.core.TransportConfiguration;
import org.apache.activemq.artemis.api.core.client.ClientSession;
import org.apache.activemq.artemis.api.core.client.ClientSessionFactory;
import org.apache.activemq.artemis.api.core.client.ServerLocator;
import org.apache.activemq.artemis.core.server.impl.ActiveMQServerImpl;
import org.apache.activemq.artemis.logs.AssertionLoggerHandler;
import org.apache.activemq.artemis.tests.util.TransportConfigurationUtils;
import org.apache.activemq.artemis.tests.util.Wait;
@ -72,7 +73,7 @@ public class NetworkIsolationTest extends FailoverTestBase {
liveServer.getServer().getNetworkHealthCheck().addAddress(badAddress);
Assert.assertTrue(Wait.waitFor(() -> !liveServer.isStarted()));
Wait.assertFalse(liveServer::isStarted);
liveServer.getServer().getNetworkHealthCheck().clearAddresses();
@ -130,7 +131,7 @@ public class NetworkIsolationTest extends FailoverTestBase {
backupServer.getServer().getNetworkHealthCheck().clearAddresses();
// This will make sure the backup got synchronized after the network was activated again
Assert.assertTrue(backupServer.getServer().getReplicationEndpoint().isStarted());
Wait.assertTrue(() -> backupServer.getServer().getReplicationEndpoint().isStarted());
} finally {
AssertionLoggerHandler.stopCapture();
}
@ -140,35 +141,28 @@ public class NetworkIsolationTest extends FailoverTestBase {
public void testLiveIsolated() throws Exception {
backupServer.stop();
liveServer.stop();
FakeServiceComponent component = new FakeServiceComponent("Component for " + getName());
liveServer.getServer().addExternalComponent(component, true);
liveServer.getServer().getConfiguration().setNetworkCheckList(badAddress).
setNetworkCheckPeriod(100).setNetworkCheckTimeout(100);
((ActiveMQServerImpl)liveServer.getServer()).reloadNetworkHealthCheck();
try {
liveServer.start();
Assert.assertEquals(100L, liveServer.getServer().getNetworkHealthCheck().getPeriod());
liveServer.getServer().getNetworkHealthCheck().setTimeUnit(TimeUnit.MILLISECONDS);
Assert.assertFalse(liveServer.getServer().getNetworkHealthCheck().check());
long timeout = System.currentTimeMillis() + 30000;
while (liveServer.isStarted() && System.currentTimeMillis() < timeout) {
Thread.sleep(100);
}
Assert.assertFalse(liveServer.isStarted());
Wait.assertFalse(liveServer::isStarted);
liveServer.getServer().getNetworkHealthCheck().setIgnoreLoopback(true).addAddress("127.0.0.1");
timeout = System.currentTimeMillis() + 30000;
while (!liveServer.isStarted() && System.currentTimeMillis() < timeout) {
Thread.sleep(100);
}
Wait.assertTrue(liveServer::isStarted);
Assert.assertTrue(liveServer.isStarted());
Assert.assertTrue(component.isStarted());
} catch (Throwable e) {
logger.warn(e.getMessage(), e);
throw e;

View File

@ -27,9 +27,11 @@ import org.apache.activemq.artemis.api.core.client.TopologyMember;
import org.apache.activemq.artemis.core.config.ha.ReplicaPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicatedPolicyConfiguration;
import org.apache.activemq.artemis.core.protocol.core.impl.PacketImpl;
import org.apache.activemq.artemis.core.server.ActiveMQComponent;
import org.apache.activemq.artemis.core.server.impl.SharedNothingLiveActivation;
import org.apache.activemq.artemis.tests.integration.cluster.util.BackupSyncDelay;
import org.apache.activemq.artemis.utils.RetryRule;
import org.junit.Assert;
import org.junit.Rule;
import org.junit.Test;
@ -113,6 +115,12 @@ public class QuorumFailOverTest extends StaticClusterWithBackupFailoverTest {
new BackupSyncDelay(servers[4], servers[1], PacketImpl.REPLICATION_SCHEDULED_FAILOVER);
startServers(3, 4, 5);
ActiveMQComponent[] externalComponents = new ActiveMQComponent[6];
for (int i = 0; i < 6; i++) {
externalComponents[i] = new FakeServiceComponent("server " + i);
servers[i].addExternalComponent(externalComponents[i], true);
}
for (int i : liveServerIDs) {
waitForTopology(servers[i], 3, 3);
}
@ -146,6 +154,10 @@ public class QuorumFailOverTest extends StaticClusterWithBackupFailoverTest {
assertFalse(servers[0].isReplicaSync());
waitForRemoteBackupSynchronization(servers[0]);
assertTrue(servers[0].isReplicaSync());
for (ActiveMQComponent component : externalComponents) {
Assert.assertTrue("component " + component + " is stopped, the web server would been stopped here", component.isStarted());
}
}
@Override