ARTEMIS-3340 Sequential activation tracking for pluggable quorum replication policies + peer
Co-authored-by: franz1981 <nigro.fra@gmail.com>
This commit is contained in:
parent
536271485f
commit
ca7a100de0
|
@ -19,7 +19,7 @@ package org.apache.activemq.artemis.cli.commands;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.util.Timer;
|
import java.util.Timer;
|
||||||
import java.util.TimerTask;
|
import java.util.TimerTask;
|
||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
import java.util.concurrent.atomic.AtomicReference;
|
||||||
|
|
||||||
import io.airlift.airline.Command;
|
import io.airlift.airline.Command;
|
||||||
import io.airlift.airline.Option;
|
import io.airlift.airline.Option;
|
||||||
|
@ -71,6 +71,7 @@ public class Run extends LockAbstract {
|
||||||
public Object execute(ActionContext context) throws Exception {
|
public Object execute(ActionContext context) throws Exception {
|
||||||
super.execute(context);
|
super.execute(context);
|
||||||
|
|
||||||
|
AtomicReference<Throwable> serverActivationFailed = new AtomicReference<>();
|
||||||
try {
|
try {
|
||||||
BrokerDTO broker = getBrokerDTO();
|
BrokerDTO broker = getBrokerDTO();
|
||||||
ActiveMQSecurityManager securityManager = SecurityManagerFactory.create(broker.security);
|
ActiveMQSecurityManager securityManager = SecurityManagerFactory.create(broker.security);
|
||||||
|
@ -110,8 +111,7 @@ public class Run extends LockAbstract {
|
||||||
server = BrokerFactory.createServer(broker.server, securityManager, activateCallback);
|
server = BrokerFactory.createServer(broker.server, securityManager, activateCallback);
|
||||||
|
|
||||||
server.createComponents();
|
server.createComponents();
|
||||||
AtomicBoolean serverActivationFailed = new AtomicBoolean(false);
|
server.getServer().registerActivationFailureListener(exception -> serverActivationFailed.set(exception));
|
||||||
server.getServer().registerActivationFailureListener(exception -> serverActivationFailed.set(true));
|
|
||||||
server.start();
|
server.start();
|
||||||
server.getServer().addExternalComponent(managementContext, false);
|
server.getServer().addExternalComponent(managementContext, false);
|
||||||
|
|
||||||
|
@ -126,14 +126,16 @@ public class Run extends LockAbstract {
|
||||||
server.getServer().addExternalComponent(component, true);
|
server.getServer().addExternalComponent(component, true);
|
||||||
assert component.isStarted();
|
assert component.isStarted();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (serverActivationFailed.get()) {
|
|
||||||
stop();
|
|
||||||
}
|
|
||||||
} catch (Throwable t) {
|
} catch (Throwable t) {
|
||||||
t.printStackTrace();
|
t.printStackTrace();
|
||||||
stop();
|
serverActivationFailed.set(t);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (serverActivationFailed.get() != null) {
|
||||||
|
stop();
|
||||||
|
return serverActivationFailed.get();
|
||||||
|
}
|
||||||
|
|
||||||
return new Pair<>(managementContext, server.getServer());
|
return new Pair<>(managementContext, server.getServer());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -2851,4 +2851,12 @@ public interface AuditLogger extends BasicLogger {
|
||||||
@LogMessage(level = Logger.Level.INFO)
|
@LogMessage(level = Logger.Level.INFO)
|
||||||
@Message(id = 601748, value = "User {0} is getting max retry interval on target resource: {1} {2}", format = Message.Format.MESSAGE_FORMAT)
|
@Message(id = 601748, value = "User {0} is getting max retry interval on target resource: {1} {2}", format = Message.Format.MESSAGE_FORMAT)
|
||||||
void getMaxRetryInterval(String user, Object source, Object... args);
|
void getMaxRetryInterval(String user, Object source, Object... args);
|
||||||
|
|
||||||
|
static void getActivationSequence(Object source) {
|
||||||
|
BASE_LOGGER.getActivationSequence(getCaller(), source);
|
||||||
|
}
|
||||||
|
|
||||||
|
@LogMessage(level = Logger.Level.INFO)
|
||||||
|
@Message(id = 601749, value = "User {0} is getting activation sequence on target resource: {1} {2}", format = Message.Format.MESSAGE_FORMAT)
|
||||||
|
void getActivationSequence(String user, Object source, Object... args);
|
||||||
}
|
}
|
||||||
|
|
|
@ -321,6 +321,15 @@ public interface ActiveMQServerControl {
|
||||||
@Attribute(desc = "Node ID of this server")
|
@Attribute(desc = "Node ID of this server")
|
||||||
String getNodeID();
|
String getNodeID();
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the current activation sequence number of this server.
|
||||||
|
* <br>
|
||||||
|
* When replicated, peers may coordinate activation with this monotonic sequence
|
||||||
|
*/
|
||||||
|
@Attribute(desc = "Activation sequence of this server instance")
|
||||||
|
long getActivationSequence();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the management notification address of this server.
|
* Returns the management notification address of this server.
|
||||||
* <br>
|
* <br>
|
||||||
|
|
|
@ -44,6 +44,12 @@
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.zookeeper</groupId>
|
<groupId>org.apache.zookeeper</groupId>
|
||||||
<artifactId>zookeeper</artifactId>
|
<artifactId>zookeeper</artifactId>
|
||||||
|
<exclusions>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>org.slf4j</groupId>
|
||||||
|
<artifactId>slf4j-log4j12</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
</exclusions>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.curator</groupId>
|
<groupId>org.apache.curator</groupId>
|
||||||
|
@ -59,6 +65,10 @@
|
||||||
<groupId>org.jboss.logging</groupId>
|
<groupId>org.jboss.logging</groupId>
|
||||||
<artifactId>jboss-logging</artifactId>
|
<artifactId>jboss-logging</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.jboss.slf4j</groupId>
|
||||||
|
<artifactId>slf4j-jboss-logmanager</artifactId>
|
||||||
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.activemq</groupId>
|
<groupId>org.apache.activemq</groupId>
|
||||||
<artifactId>artemis-commons</artifactId>
|
<artifactId>artemis-commons</artifactId>
|
||||||
|
|
|
@ -18,16 +18,18 @@ package org.apache.activemq.artemis.quorum.file;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.nio.ByteOrder;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.concurrent.ExecutionException;
|
import java.util.concurrent.ExecutionException;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
import java.util.concurrent.TimeoutException;
|
|
||||||
|
|
||||||
import org.apache.activemq.artemis.quorum.DistributedLock;
|
import org.apache.activemq.artemis.quorum.DistributedLock;
|
||||||
import org.apache.activemq.artemis.quorum.DistributedPrimitiveManager;
|
import org.apache.activemq.artemis.quorum.DistributedPrimitiveManager;
|
||||||
import org.apache.activemq.artemis.quorum.MutableLong;
|
import org.apache.activemq.artemis.quorum.MutableLong;
|
||||||
|
import org.apache.activemq.artemis.quorum.UnavailableStateException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This is an implementation suitable to be used just on unit tests and it won't attempt
|
* This is an implementation suitable to be used just on unit tests and it won't attempt
|
||||||
|
@ -127,8 +129,55 @@ public class FileBasedPrimitiveManager implements DistributedPrimitiveManager {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public MutableLong getMutableLong(String mutableLongId) throws InterruptedException, ExecutionException, TimeoutException {
|
public MutableLong getMutableLong(final String mutableLongId) throws ExecutionException {
|
||||||
// TODO
|
// use a lock file - but with a prefix
|
||||||
return null;
|
final FileDistributedLock fileDistributedLock = (FileDistributedLock) getDistributedLock("ML:" + mutableLongId);
|
||||||
|
return new MutableLong() {
|
||||||
|
@Override
|
||||||
|
public String getMutableLongId() {
|
||||||
|
return mutableLongId;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long get() throws UnavailableStateException {
|
||||||
|
try {
|
||||||
|
return readLong(fileDistributedLock);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new UnavailableStateException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void set(long value) throws UnavailableStateException {
|
||||||
|
try {
|
||||||
|
writeLong(fileDistributedLock, value);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new UnavailableStateException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() {
|
||||||
|
fileDistributedLock.close();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private void writeLong(FileDistributedLock fileDistributedLock, long value) throws IOException {
|
||||||
|
ByteBuffer buffer = ByteBuffer.allocate(Long.BYTES).order(ByteOrder.BIG_ENDIAN);
|
||||||
|
buffer.putLong(value);
|
||||||
|
buffer.flip();
|
||||||
|
if (fileDistributedLock.getChannel().position(0).write(buffer) == Long.BYTES) {
|
||||||
|
fileDistributedLock.getChannel().force(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private long readLong(FileDistributedLock fileDistributedLock) throws IOException {
|
||||||
|
ByteBuffer buffer = ByteBuffer.allocate(Long.BYTES).order(ByteOrder.BIG_ENDIAN);
|
||||||
|
if (fileDistributedLock.getChannel().position(0).read(buffer, 0) != Long.BYTES) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
buffer.flip();
|
||||||
|
return buffer.getLong();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -138,4 +138,8 @@ final class FileDistributedLock implements DistributedLock {
|
||||||
public void close() {
|
public void close() {
|
||||||
close(true);
|
close(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public FileChannel getChannel() {
|
||||||
|
return channel;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -178,6 +178,7 @@ public abstract class DistributedLockTest {
|
||||||
ownerManager.getDistributedLock("a").unlock();
|
ownerManager.getDistributedLock("a").unlock();
|
||||||
Assert.assertFalse(observerManager.getDistributedLock("a").isHeldByCaller());
|
Assert.assertFalse(observerManager.getDistributedLock("a").isHeldByCaller());
|
||||||
Assert.assertFalse(ownerManager.getDistributedLock("a").isHeldByCaller());
|
Assert.assertFalse(ownerManager.getDistributedLock("a").isHeldByCaller());
|
||||||
|
Assert.assertTrue(observerManager.getDistributedLock("a").tryLock());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
|
@ -34,13 +34,6 @@ public class ReplicationBackupPolicyConfiguration implements HAPolicyConfigurati
|
||||||
|
|
||||||
private long initialReplicationSyncTimeout = ActiveMQDefaultConfiguration.getDefaultInitialReplicationSyncTimeout();
|
private long initialReplicationSyncTimeout = ActiveMQDefaultConfiguration.getDefaultInitialReplicationSyncTimeout();
|
||||||
|
|
||||||
private int voteRetries = ActiveMQDefaultConfiguration.getDefaultVoteRetries();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* TODO: move if into {@link ActiveMQDefaultConfiguration} when the configuration is stable.
|
|
||||||
*/
|
|
||||||
private long voteRetryWait = 2000;
|
|
||||||
|
|
||||||
private long retryReplicationWait = ActiveMQDefaultConfiguration.getDefaultRetryReplicationWait();
|
private long retryReplicationWait = ActiveMQDefaultConfiguration.getDefaultRetryReplicationWait();
|
||||||
|
|
||||||
private DistributedPrimitiveManagerConfiguration distributedManagerConfiguration = null;
|
private DistributedPrimitiveManagerConfiguration distributedManagerConfiguration = null;
|
||||||
|
@ -102,24 +95,6 @@ public class ReplicationBackupPolicyConfiguration implements HAPolicyConfigurati
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getVoteRetries() {
|
|
||||||
return voteRetries;
|
|
||||||
}
|
|
||||||
|
|
||||||
public ReplicationBackupPolicyConfiguration setVoteRetries(int voteRetries) {
|
|
||||||
this.voteRetries = voteRetries;
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
public ReplicationBackupPolicyConfiguration setVoteRetryWait(long voteRetryWait) {
|
|
||||||
this.voteRetryWait = voteRetryWait;
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
public long getVoteRetryWait() {
|
|
||||||
return voteRetryWait;
|
|
||||||
}
|
|
||||||
|
|
||||||
public long getRetryReplicationWait() {
|
public long getRetryReplicationWait() {
|
||||||
return retryReplicationWait;
|
return retryReplicationWait;
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,25 +21,18 @@ import org.apache.activemq.artemis.core.config.HAPolicyConfiguration;
|
||||||
|
|
||||||
public class ReplicationPrimaryPolicyConfiguration implements HAPolicyConfiguration {
|
public class ReplicationPrimaryPolicyConfiguration implements HAPolicyConfiguration {
|
||||||
|
|
||||||
private boolean checkForLiveServer = ActiveMQDefaultConfiguration.isDefaultCheckForLiveServer();
|
|
||||||
|
|
||||||
private String groupName = null;
|
private String groupName = null;
|
||||||
|
|
||||||
private String clusterName = null;
|
private String clusterName = null;
|
||||||
|
|
||||||
private long initialReplicationSyncTimeout = ActiveMQDefaultConfiguration.getDefaultInitialReplicationSyncTimeout();
|
private long initialReplicationSyncTimeout = ActiveMQDefaultConfiguration.getDefaultInitialReplicationSyncTimeout();
|
||||||
|
|
||||||
private int voteRetries = ActiveMQDefaultConfiguration.getDefaultVoteRetries();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* TODO: move if into {@link ActiveMQDefaultConfiguration} when the configuration is stable.
|
|
||||||
*/
|
|
||||||
private long voteRetryWait = 2000;
|
|
||||||
|
|
||||||
private Long retryReplicationWait = ActiveMQDefaultConfiguration.getDefaultRetryReplicationWait();
|
private Long retryReplicationWait = ActiveMQDefaultConfiguration.getDefaultRetryReplicationWait();
|
||||||
|
|
||||||
private DistributedPrimitiveManagerConfiguration distributedManagerConfiguration = null;
|
private DistributedPrimitiveManagerConfiguration distributedManagerConfiguration = null;
|
||||||
|
|
||||||
|
private String coordinationId = null;
|
||||||
|
|
||||||
public static ReplicationPrimaryPolicyConfiguration withDefault() {
|
public static ReplicationPrimaryPolicyConfiguration withDefault() {
|
||||||
return new ReplicationPrimaryPolicyConfiguration();
|
return new ReplicationPrimaryPolicyConfiguration();
|
||||||
}
|
}
|
||||||
|
@ -52,15 +45,6 @@ public class ReplicationPrimaryPolicyConfiguration implements HAPolicyConfigurat
|
||||||
return TYPE.PRIMARY;
|
return TYPE.PRIMARY;
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean isCheckForLiveServer() {
|
|
||||||
return checkForLiveServer;
|
|
||||||
}
|
|
||||||
|
|
||||||
public ReplicationPrimaryPolicyConfiguration setCheckForLiveServer(boolean checkForLiveServer) {
|
|
||||||
this.checkForLiveServer = checkForLiveServer;
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getGroupName() {
|
public String getGroupName() {
|
||||||
return groupName;
|
return groupName;
|
||||||
}
|
}
|
||||||
|
@ -88,24 +72,6 @@ public class ReplicationPrimaryPolicyConfiguration implements HAPolicyConfigurat
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getVoteRetries() {
|
|
||||||
return voteRetries;
|
|
||||||
}
|
|
||||||
|
|
||||||
public ReplicationPrimaryPolicyConfiguration setVoteRetries(int voteRetries) {
|
|
||||||
this.voteRetries = voteRetries;
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
public ReplicationPrimaryPolicyConfiguration setVoteRetryWait(long voteRetryWait) {
|
|
||||||
this.voteRetryWait = voteRetryWait;
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
public long getVoteRetryWait() {
|
|
||||||
return voteRetryWait;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setRetryReplicationWait(Long retryReplicationWait) {
|
public void setRetryReplicationWait(Long retryReplicationWait) {
|
||||||
this.retryReplicationWait = retryReplicationWait;
|
this.retryReplicationWait = retryReplicationWait;
|
||||||
}
|
}
|
||||||
|
@ -122,4 +88,27 @@ public class ReplicationPrimaryPolicyConfiguration implements HAPolicyConfigurat
|
||||||
public DistributedPrimitiveManagerConfiguration getDistributedManagerConfiguration() {
|
public DistributedPrimitiveManagerConfiguration getDistributedManagerConfiguration() {
|
||||||
return distributedManagerConfiguration;
|
return distributedManagerConfiguration;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getCoordinationId() {
|
||||||
|
return coordinationId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setCoordinationId(String newCoordinationId) {
|
||||||
|
if (newCoordinationId == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
final int len = newCoordinationId.length();
|
||||||
|
if (len >= 16) {
|
||||||
|
this.coordinationId = newCoordinationId.substring(0, 16);
|
||||||
|
} else if (len % 2 != 0) {
|
||||||
|
// must be even for conversion to uuid, extend to next even
|
||||||
|
this.coordinationId = newCoordinationId + "+";
|
||||||
|
} else if (len > 0 ) {
|
||||||
|
// run with it
|
||||||
|
this.coordinationId = newCoordinationId;
|
||||||
|
}
|
||||||
|
if (this.coordinationId != null) {
|
||||||
|
this.coordinationId = this.coordinationId.replace('-', '.');
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1715,22 +1715,18 @@ public final class FileConfigurationParser extends XMLConfigurationUtil {
|
||||||
private ReplicationPrimaryPolicyConfiguration createReplicationPrimaryHaPolicy(Element policyNode, Configuration config) {
|
private ReplicationPrimaryPolicyConfiguration createReplicationPrimaryHaPolicy(Element policyNode, Configuration config) {
|
||||||
ReplicationPrimaryPolicyConfiguration configuration = ReplicationPrimaryPolicyConfiguration.withDefault();
|
ReplicationPrimaryPolicyConfiguration configuration = ReplicationPrimaryPolicyConfiguration.withDefault();
|
||||||
|
|
||||||
configuration.setCheckForLiveServer(getBoolean(policyNode, "check-for-live-server", configuration.isCheckForLiveServer()));
|
|
||||||
|
|
||||||
configuration.setGroupName(getString(policyNode, "group-name", configuration.getGroupName(), Validators.NO_CHECK));
|
configuration.setGroupName(getString(policyNode, "group-name", configuration.getGroupName(), Validators.NO_CHECK));
|
||||||
|
|
||||||
configuration.setClusterName(getString(policyNode, "cluster-name", configuration.getClusterName(), Validators.NO_CHECK));
|
configuration.setClusterName(getString(policyNode, "cluster-name", configuration.getClusterName(), Validators.NO_CHECK));
|
||||||
|
|
||||||
configuration.setInitialReplicationSyncTimeout(getLong(policyNode, "initial-replication-sync-timeout", configuration.getInitialReplicationSyncTimeout(), Validators.GT_ZERO));
|
configuration.setInitialReplicationSyncTimeout(getLong(policyNode, "initial-replication-sync-timeout", configuration.getInitialReplicationSyncTimeout(), Validators.GT_ZERO));
|
||||||
|
|
||||||
configuration.setVoteRetries(getInteger(policyNode, "vote-retries", configuration.getVoteRetries(), Validators.MINUS_ONE_OR_GE_ZERO));
|
configuration.setRetryReplicationWait(getLong(policyNode, "retry-replication-wait", configuration.getRetryReplicationWait(), Validators.GT_ZERO));
|
||||||
|
|
||||||
configuration.setVoteRetryWait(getLong(policyNode, "vote-retry-wait", configuration.getVoteRetryWait(), Validators.GT_ZERO));
|
|
||||||
|
|
||||||
configuration.setRetryReplicationWait(getLong(policyNode, "retry-replication-wait", configuration.getVoteRetryWait(), Validators.GT_ZERO));
|
|
||||||
|
|
||||||
configuration.setDistributedManagerConfiguration(createDistributedPrimitiveManagerConfiguration(policyNode, config));
|
configuration.setDistributedManagerConfiguration(createDistributedPrimitiveManagerConfiguration(policyNode, config));
|
||||||
|
|
||||||
|
configuration.setCoordinationId(getString(policyNode, "coordination-id", configuration.getCoordinationId(), Validators.NOT_NULL_OR_EMPTY));
|
||||||
|
|
||||||
return configuration;
|
return configuration;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1748,11 +1744,7 @@ public final class FileConfigurationParser extends XMLConfigurationUtil {
|
||||||
|
|
||||||
configuration.setMaxSavedReplicatedJournalsSize(getInteger(policyNode, "max-saved-replicated-journals-size", configuration.getMaxSavedReplicatedJournalsSize(), Validators.MINUS_ONE_OR_GE_ZERO));
|
configuration.setMaxSavedReplicatedJournalsSize(getInteger(policyNode, "max-saved-replicated-journals-size", configuration.getMaxSavedReplicatedJournalsSize(), Validators.MINUS_ONE_OR_GE_ZERO));
|
||||||
|
|
||||||
configuration.setVoteRetries(getInteger(policyNode, "vote-retries", configuration.getVoteRetries(), Validators.MINUS_ONE_OR_GE_ZERO));
|
configuration.setRetryReplicationWait(getLong(policyNode, "retry-replication-wait", configuration.getRetryReplicationWait(), Validators.GT_ZERO));
|
||||||
|
|
||||||
configuration.setVoteRetryWait(getLong(policyNode, "vote-retry-wait", configuration.getVoteRetryWait(), Validators.GT_ZERO));
|
|
||||||
|
|
||||||
configuration.setRetryReplicationWait(getLong(policyNode, "retry-replication-wait", configuration.getVoteRetryWait(), Validators.GT_ZERO));
|
|
||||||
|
|
||||||
configuration.setDistributedManagerConfiguration(createDistributedPrimitiveManagerConfiguration(policyNode, config));
|
configuration.setDistributedManagerConfiguration(createDistributedPrimitiveManagerConfiguration(policyNode, config));
|
||||||
|
|
||||||
|
|
|
@ -4204,6 +4204,17 @@ public class ActiveMQServerControlImpl extends AbstractControl implements Active
|
||||||
return server.getNodeID() == null ? null : server.getNodeID().toString();
|
return server.getNodeID() == null ? null : server.getNodeID().toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getActivationSequence() {
|
||||||
|
if (AuditLogger.isBaseLoggingEnabled()) {
|
||||||
|
AuditLogger.getActivationSequence(this.server);
|
||||||
|
}
|
||||||
|
if (server.getNodeManager() != null) {
|
||||||
|
return server.getNodeManager().getNodeActivationSequence();
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getManagementNotificationAddress() {
|
public String getManagementNotificationAddress() {
|
||||||
if (AuditLogger.isBaseLoggingEnabled()) {
|
if (AuditLogger.isBaseLoggingEnabled()) {
|
||||||
|
|
|
@ -43,7 +43,8 @@ public class ReplicationStartSyncMessage extends PacketImpl {
|
||||||
public enum SyncDataType {
|
public enum SyncDataType {
|
||||||
JournalBindings(AbstractJournalStorageManager.JournalContent.BINDINGS.typeByte),
|
JournalBindings(AbstractJournalStorageManager.JournalContent.BINDINGS.typeByte),
|
||||||
JournalMessages(AbstractJournalStorageManager.JournalContent.MESSAGES.typeByte),
|
JournalMessages(AbstractJournalStorageManager.JournalContent.MESSAGES.typeByte),
|
||||||
LargeMessages((byte) 2);
|
LargeMessages((byte) 2),
|
||||||
|
ActivationSequence((byte) 3);
|
||||||
|
|
||||||
private byte code;
|
private byte code;
|
||||||
|
|
||||||
|
@ -62,6 +63,9 @@ public class ReplicationStartSyncMessage extends PacketImpl {
|
||||||
return JournalMessages;
|
return JournalMessages;
|
||||||
if (code == LargeMessages.code)
|
if (code == LargeMessages.code)
|
||||||
return LargeMessages;
|
return LargeMessages;
|
||||||
|
if (code == ActivationSequence.code)
|
||||||
|
return ActivationSequence;
|
||||||
|
|
||||||
throw new InvalidParameterException("invalid byte: " + code);
|
throw new InvalidParameterException("invalid byte: " + code);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -80,6 +84,14 @@ public class ReplicationStartSyncMessage extends PacketImpl {
|
||||||
nodeID = ""; // this value will be ignored
|
nodeID = ""; // this value will be ignored
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public ReplicationStartSyncMessage(String nodeID, long nodeDataVersion) {
|
||||||
|
this(nodeID);
|
||||||
|
ids = new long[1];
|
||||||
|
ids[0] = nodeDataVersion;
|
||||||
|
dataType = SyncDataType.ActivationSequence;
|
||||||
|
}
|
||||||
|
|
||||||
public ReplicationStartSyncMessage(String nodeID) {
|
public ReplicationStartSyncMessage(String nodeID) {
|
||||||
this();
|
this();
|
||||||
synchronizationIsFinished = true;
|
synchronizationIsFinished = true;
|
||||||
|
@ -118,10 +130,6 @@ public class ReplicationStartSyncMessage extends PacketImpl {
|
||||||
DataConstants.SIZE_BOOLEAN + // buffer.writeBoolean(allowsAutoFailBack);
|
DataConstants.SIZE_BOOLEAN + // buffer.writeBoolean(allowsAutoFailBack);
|
||||||
nodeID.length() * 3; // buffer.writeString(nodeID); -- an estimate
|
nodeID.length() * 3; // buffer.writeString(nodeID); -- an estimate
|
||||||
|
|
||||||
|
|
||||||
if (synchronizationIsFinished) {
|
|
||||||
return size;
|
|
||||||
}
|
|
||||||
size += DataConstants.SIZE_BYTE + // buffer.writeByte(dataType.code);
|
size += DataConstants.SIZE_BYTE + // buffer.writeByte(dataType.code);
|
||||||
DataConstants.SIZE_INT + // buffer.writeInt(ids.length);
|
DataConstants.SIZE_INT + // buffer.writeInt(ids.length);
|
||||||
DataConstants.SIZE_LONG * ids.length; // the write loop
|
DataConstants.SIZE_LONG * ids.length; // the write loop
|
||||||
|
@ -135,8 +143,6 @@ public class ReplicationStartSyncMessage extends PacketImpl {
|
||||||
buffer.writeBoolean(synchronizationIsFinished);
|
buffer.writeBoolean(synchronizationIsFinished);
|
||||||
buffer.writeBoolean(allowsAutoFailBack);
|
buffer.writeBoolean(allowsAutoFailBack);
|
||||||
buffer.writeString(nodeID);
|
buffer.writeString(nodeID);
|
||||||
if (synchronizationIsFinished)
|
|
||||||
return;
|
|
||||||
buffer.writeByte(dataType.code);
|
buffer.writeByte(dataType.code);
|
||||||
buffer.writeInt(ids.length);
|
buffer.writeInt(ids.length);
|
||||||
for (long id : ids) {
|
for (long id : ids) {
|
||||||
|
@ -149,9 +155,6 @@ public class ReplicationStartSyncMessage extends PacketImpl {
|
||||||
synchronizationIsFinished = buffer.readBoolean();
|
synchronizationIsFinished = buffer.readBoolean();
|
||||||
allowsAutoFailBack = buffer.readBoolean();
|
allowsAutoFailBack = buffer.readBoolean();
|
||||||
nodeID = buffer.readString();
|
nodeID = buffer.readString();
|
||||||
if (synchronizationIsFinished) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
dataType = SyncDataType.getDataType(buffer.readByte());
|
dataType = SyncDataType.getDataType(buffer.readByte());
|
||||||
int length = buffer.readInt();
|
int length = buffer.readInt();
|
||||||
ids = new long[length];
|
ids = new long[length];
|
||||||
|
|
|
@ -483,7 +483,7 @@ public final class ReplicationEndpoint implements ChannelHandler, ActiveMQCompon
|
||||||
logger.trace("Backup is synchronized / BACKUP-SYNC-DONE");
|
logger.trace("Backup is synchronized / BACKUP-SYNC-DONE");
|
||||||
}
|
}
|
||||||
|
|
||||||
ActiveMQServerLogger.LOGGER.backupServerSynched(server);
|
ActiveMQServerLogger.LOGGER.backupServerSynchronized(server, liveID);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -560,6 +560,11 @@ public final class ReplicationEndpoint implements ChannelHandler, ActiveMQCompon
|
||||||
return replicationResponseMessage;
|
return replicationResponseMessage;
|
||||||
|
|
||||||
if (packet.isSynchronizationFinished()) {
|
if (packet.isSynchronizationFinished()) {
|
||||||
|
if (packet.getFileIds() != null && packet.getFileIds().length == 1) {
|
||||||
|
// this is the version sequence of the data we are replicating
|
||||||
|
// verified if we activate with this data
|
||||||
|
server.getNodeManager().writeNodeActivationSequence(packet.getFileIds()[0]);
|
||||||
|
}
|
||||||
finishSynchronization(packet.getNodeID());
|
finishSynchronization(packet.getNodeID());
|
||||||
replicationResponseMessage.setSynchronizationIsFinishedAcknowledgement(true);
|
replicationResponseMessage.setSynchronizationIsFinishedAcknowledgement(true);
|
||||||
return replicationResponseMessage;
|
return replicationResponseMessage;
|
||||||
|
|
|
@ -821,7 +821,7 @@ public final class ReplicationManager implements ActiveMQComponent {
|
||||||
}
|
}
|
||||||
|
|
||||||
synchronizationIsFinishedAcknowledgement.countUp();
|
synchronizationIsFinishedAcknowledgement.countUp();
|
||||||
sendReplicatePacket(new ReplicationStartSyncMessage(nodeID));
|
sendReplicatePacket(new ReplicationStartSyncMessage(nodeID, server.getNodeManager().getNodeActivationSequence()));
|
||||||
try {
|
try {
|
||||||
if (!synchronizationIsFinishedAcknowledgement.await(initialReplicationSyncTimeout)) {
|
if (!synchronizationIsFinishedAcknowledgement.await(initialReplicationSyncTimeout)) {
|
||||||
ActiveMQReplicationTimeooutException exception = ActiveMQMessageBundle.BUNDLE.replicationSynchronizationTimeout(initialReplicationSyncTimeout);
|
ActiveMQReplicationTimeooutException exception = ActiveMQMessageBundle.BUNDLE.replicationSynchronizationTimeout(initialReplicationSyncTimeout);
|
||||||
|
|
|
@ -190,8 +190,8 @@ public interface ActiveMQServerLogger extends BasicLogger {
|
||||||
void errorStoppingConnectorService(@Cause Throwable e, String name);
|
void errorStoppingConnectorService(@Cause Throwable e, String name);
|
||||||
|
|
||||||
@LogMessage(level = Logger.Level.INFO)
|
@LogMessage(level = Logger.Level.INFO)
|
||||||
@Message(id = 221024, value = "Backup server {0} is synchronized with live-server.", format = Message.Format.MESSAGE_FORMAT)
|
@Message(id = 221024, value = "Backup server {0} is synchronized with live server, nodeID={1}.", format = Message.Format.MESSAGE_FORMAT)
|
||||||
void backupServerSynched(ActiveMQServerImpl server);
|
void backupServerSynchronized(ActiveMQServerImpl server, String liveID);
|
||||||
|
|
||||||
@LogMessage(level = Logger.Level.INFO)
|
@LogMessage(level = Logger.Level.INFO)
|
||||||
@Message(id = 221025, value = "Replication: sending {0} (size={1}) to replica.", format = Message.Format.MESSAGE_FORMAT)
|
@Message(id = 221025, value = "Replication: sending {0} (size={1}) to replica.", format = Message.Format.MESSAGE_FORMAT)
|
||||||
|
|
|
@ -39,6 +39,7 @@ public abstract class NodeManager implements ActiveMQComponent {
|
||||||
private UUID uuid;
|
private UUID uuid;
|
||||||
private boolean isStarted = false;
|
private boolean isStarted = false;
|
||||||
private final Set<FileLockNodeManager.LockListener> lockListeners;
|
private final Set<FileLockNodeManager.LockListener> lockListeners;
|
||||||
|
protected long nodeActivationSequence; // local version of a coordinated sequence, tracking state transitions of ownership
|
||||||
|
|
||||||
public NodeManager(final boolean replicatedBackup) {
|
public NodeManager(final boolean replicatedBackup) {
|
||||||
this.replicatedBackup = replicatedBackup;
|
this.replicatedBackup = replicatedBackup;
|
||||||
|
@ -79,18 +80,30 @@ public abstract class NodeManager implements ActiveMQComponent {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public long readDataVersion() throws NodeManagerException {
|
public long readNodeActivationSequence() throws NodeManagerException {
|
||||||
// TODO make it abstract
|
// TODO make it abstract
|
||||||
throw new UnsupportedOperationException("TODO");
|
throw new UnsupportedOperationException("TODO");
|
||||||
}
|
}
|
||||||
|
|
||||||
public void writeDataVersion(long version) throws NodeManagerException {
|
public void writeNodeActivationSequence(long version) throws NodeManagerException {
|
||||||
// TODO make it abstract
|
// TODO make it abstract
|
||||||
throw new UnsupportedOperationException("TODO");
|
throw new UnsupportedOperationException("TODO");
|
||||||
}
|
}
|
||||||
|
|
||||||
public abstract SimpleString readNodeId() throws NodeManagerException;
|
public abstract SimpleString readNodeId() throws NodeManagerException;
|
||||||
|
|
||||||
|
public long getNodeActivationSequence() {
|
||||||
|
synchronized (nodeIDGuard) {
|
||||||
|
return nodeActivationSequence;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setNodeActivationSequence(long activationSequence) {
|
||||||
|
synchronized (nodeIDGuard) {
|
||||||
|
nodeActivationSequence = activationSequence;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public UUID getUUID() {
|
public UUID getUUID() {
|
||||||
synchronized (nodeIDGuard) {
|
synchronized (nodeIDGuard) {
|
||||||
return uuid;
|
return uuid;
|
||||||
|
|
|
@ -32,10 +32,8 @@ public class ReplicationBackupPolicy implements HAPolicy<ReplicationBackupActiva
|
||||||
private final String groupName;
|
private final String groupName;
|
||||||
private final String clusterName;
|
private final String clusterName;
|
||||||
private final int maxSavedReplicatedJournalsSize;
|
private final int maxSavedReplicatedJournalsSize;
|
||||||
private final int voteRetries;
|
|
||||||
private final long voteRetryWait;
|
|
||||||
private final long retryReplicationWait;
|
private final long retryReplicationWait;
|
||||||
private final DistributedPrimitiveManagerConfiguration distributedManagerConfiguration;
|
private final DistributedPrimitiveManagerConfiguration managerConfiguration;
|
||||||
private final boolean tryFailback;
|
private final boolean tryFailback;
|
||||||
|
|
||||||
private ReplicationBackupPolicy(ReplicationBackupPolicyConfiguration configuration,
|
private ReplicationBackupPolicy(ReplicationBackupPolicyConfiguration configuration,
|
||||||
|
@ -44,10 +42,8 @@ public class ReplicationBackupPolicy implements HAPolicy<ReplicationBackupActiva
|
||||||
this.clusterName = configuration.getClusterName();
|
this.clusterName = configuration.getClusterName();
|
||||||
this.maxSavedReplicatedJournalsSize = configuration.getMaxSavedReplicatedJournalsSize();
|
this.maxSavedReplicatedJournalsSize = configuration.getMaxSavedReplicatedJournalsSize();
|
||||||
this.groupName = configuration.getGroupName();
|
this.groupName = configuration.getGroupName();
|
||||||
this.voteRetries = configuration.getVoteRetries();
|
|
||||||
this.voteRetryWait = configuration.getVoteRetryWait();
|
|
||||||
this.retryReplicationWait = configuration.getRetryReplicationWait();
|
this.retryReplicationWait = configuration.getRetryReplicationWait();
|
||||||
this.distributedManagerConfiguration = configuration.getDistributedManagerConfiguration();
|
this.managerConfiguration = configuration.getDistributedManagerConfiguration();
|
||||||
this.tryFailback = true;
|
this.tryFailback = true;
|
||||||
this.livePolicy = livePolicy;
|
this.livePolicy = livePolicy;
|
||||||
}
|
}
|
||||||
|
@ -56,10 +52,8 @@ public class ReplicationBackupPolicy implements HAPolicy<ReplicationBackupActiva
|
||||||
this.clusterName = configuration.getClusterName();
|
this.clusterName = configuration.getClusterName();
|
||||||
this.maxSavedReplicatedJournalsSize = configuration.getMaxSavedReplicatedJournalsSize();
|
this.maxSavedReplicatedJournalsSize = configuration.getMaxSavedReplicatedJournalsSize();
|
||||||
this.groupName = configuration.getGroupName();
|
this.groupName = configuration.getGroupName();
|
||||||
this.voteRetries = configuration.getVoteRetries();
|
|
||||||
this.voteRetryWait = configuration.getVoteRetryWait();
|
|
||||||
this.retryReplicationWait = configuration.getRetryReplicationWait();
|
this.retryReplicationWait = configuration.getRetryReplicationWait();
|
||||||
this.distributedManagerConfiguration = configuration.getDistributedManagerConfiguration();
|
this.managerConfiguration = configuration.getDistributedManagerConfiguration();
|
||||||
this.tryFailback = false;
|
this.tryFailback = false;
|
||||||
livePolicy = ReplicationPrimaryPolicy.failoverPolicy(
|
livePolicy = ReplicationPrimaryPolicy.failoverPolicy(
|
||||||
configuration.getInitialReplicationSyncTimeout(),
|
configuration.getInitialReplicationSyncTimeout(),
|
||||||
|
@ -84,16 +78,12 @@ public class ReplicationBackupPolicy implements HAPolicy<ReplicationBackupActiva
|
||||||
/**
|
/**
|
||||||
* It creates a companion backup policy for a natural-born primary: it would cause the broker to try failback.
|
* It creates a companion backup policy for a natural-born primary: it would cause the broker to try failback.
|
||||||
*/
|
*/
|
||||||
static ReplicationBackupPolicy failback(int voteRetries,
|
static ReplicationBackupPolicy failback(long retryReplicationWait,
|
||||||
long voteRetryWait,
|
|
||||||
long retryReplicationWait,
|
|
||||||
String clusterName,
|
String clusterName,
|
||||||
String groupName,
|
String groupName,
|
||||||
ReplicationPrimaryPolicy livePolicy,
|
ReplicationPrimaryPolicy livePolicy,
|
||||||
DistributedPrimitiveManagerConfiguration distributedManagerConfiguration) {
|
DistributedPrimitiveManagerConfiguration distributedManagerConfiguration) {
|
||||||
return new ReplicationBackupPolicy(ReplicationBackupPolicyConfiguration.withDefault()
|
return new ReplicationBackupPolicy(ReplicationBackupPolicyConfiguration.withDefault()
|
||||||
.setVoteRetries(voteRetries)
|
|
||||||
.setVoteRetryWait(voteRetryWait)
|
|
||||||
.setRetryReplicationWait(retryReplicationWait)
|
.setRetryReplicationWait(retryReplicationWait)
|
||||||
.setClusterName(clusterName)
|
.setClusterName(clusterName)
|
||||||
.setGroupName(groupName)
|
.setGroupName(groupName)
|
||||||
|
@ -106,9 +96,8 @@ public class ReplicationBackupPolicy implements HAPolicy<ReplicationBackupActiva
|
||||||
boolean wasLive,
|
boolean wasLive,
|
||||||
Map<String, Object> activationParams,
|
Map<String, Object> activationParams,
|
||||||
IOCriticalErrorListener shutdownOnCriticalIO) throws Exception {
|
IOCriticalErrorListener shutdownOnCriticalIO) throws Exception {
|
||||||
return new ReplicationBackupActivation(server, wasLive, DistributedPrimitiveManager.newInstanceOf(
|
return new ReplicationBackupActivation(server, DistributedPrimitiveManager.newInstanceOf(
|
||||||
distributedManagerConfiguration.getClassName(),
|
managerConfiguration.getClassName(), managerConfiguration.getProperties()), this);
|
||||||
distributedManagerConfiguration.getProperties()), this);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -157,14 +146,6 @@ public class ReplicationBackupPolicy implements HAPolicy<ReplicationBackupActiva
|
||||||
return maxSavedReplicatedJournalsSize;
|
return maxSavedReplicatedJournalsSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getVoteRetries() {
|
|
||||||
return voteRetries;
|
|
||||||
}
|
|
||||||
|
|
||||||
public long getVoteRetryWait() {
|
|
||||||
return voteRetryWait;
|
|
||||||
}
|
|
||||||
|
|
||||||
public long getRetryReplicationWait() {
|
public long getRetryReplicationWait() {
|
||||||
return retryReplicationWait;
|
return retryReplicationWait;
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,10 +31,10 @@ public class ReplicationPrimaryPolicy implements HAPolicy<ReplicationPrimaryActi
|
||||||
private final ReplicationBackupPolicy backupPolicy;
|
private final ReplicationBackupPolicy backupPolicy;
|
||||||
private final String clusterName;
|
private final String clusterName;
|
||||||
private final String groupName;
|
private final String groupName;
|
||||||
private final boolean checkForLiveServer;
|
|
||||||
private final long initialReplicationSyncTimeout;
|
private final long initialReplicationSyncTimeout;
|
||||||
private final DistributedPrimitiveManagerConfiguration distributedManagerConfiguration;
|
private final DistributedPrimitiveManagerConfiguration distributedManagerConfiguration;
|
||||||
private final boolean allowAutoFailBack;
|
private final boolean allowAutoFailBack;
|
||||||
|
private final String coordinationId;
|
||||||
|
|
||||||
private ReplicationPrimaryPolicy(ReplicationPrimaryPolicyConfiguration configuration,
|
private ReplicationPrimaryPolicy(ReplicationPrimaryPolicyConfiguration configuration,
|
||||||
ReplicationBackupPolicy backupPolicy,
|
ReplicationBackupPolicy backupPolicy,
|
||||||
|
@ -42,9 +42,9 @@ public class ReplicationPrimaryPolicy implements HAPolicy<ReplicationPrimaryActi
|
||||||
Objects.requireNonNull(backupPolicy);
|
Objects.requireNonNull(backupPolicy);
|
||||||
clusterName = configuration.getClusterName();
|
clusterName = configuration.getClusterName();
|
||||||
groupName = configuration.getGroupName();
|
groupName = configuration.getGroupName();
|
||||||
checkForLiveServer = configuration.isCheckForLiveServer();
|
|
||||||
initialReplicationSyncTimeout = configuration.getInitialReplicationSyncTimeout();
|
initialReplicationSyncTimeout = configuration.getInitialReplicationSyncTimeout();
|
||||||
distributedManagerConfiguration = configuration.getDistributedManagerConfiguration();
|
distributedManagerConfiguration = configuration.getDistributedManagerConfiguration();
|
||||||
|
coordinationId = configuration.getCoordinationId();
|
||||||
this.allowAutoFailBack = allowAutoFailBack;
|
this.allowAutoFailBack = allowAutoFailBack;
|
||||||
this.backupPolicy = backupPolicy;
|
this.backupPolicy = backupPolicy;
|
||||||
}
|
}
|
||||||
|
@ -52,12 +52,11 @@ public class ReplicationPrimaryPolicy implements HAPolicy<ReplicationPrimaryActi
|
||||||
private ReplicationPrimaryPolicy(ReplicationPrimaryPolicyConfiguration config) {
|
private ReplicationPrimaryPolicy(ReplicationPrimaryPolicyConfiguration config) {
|
||||||
clusterName = config.getClusterName();
|
clusterName = config.getClusterName();
|
||||||
groupName = config.getGroupName();
|
groupName = config.getGroupName();
|
||||||
checkForLiveServer = config.isCheckForLiveServer();
|
coordinationId = config.getCoordinationId();
|
||||||
initialReplicationSyncTimeout = config.getInitialReplicationSyncTimeout();
|
initialReplicationSyncTimeout = config.getInitialReplicationSyncTimeout();
|
||||||
distributedManagerConfiguration = config.getDistributedManagerConfiguration();
|
distributedManagerConfiguration = config.getDistributedManagerConfiguration();
|
||||||
this.allowAutoFailBack = false;
|
this.allowAutoFailBack = false;
|
||||||
backupPolicy = ReplicationBackupPolicy.failback(config.getVoteRetries(), config.getVoteRetryWait(),
|
backupPolicy = ReplicationBackupPolicy.failback(config.getRetryReplicationWait(), config.getClusterName(),
|
||||||
config.getRetryReplicationWait(), config.getClusterName(),
|
|
||||||
config.getGroupName(), this,
|
config.getGroupName(), this,
|
||||||
config.getDistributedManagerConfiguration());
|
config.getDistributedManagerConfiguration());
|
||||||
}
|
}
|
||||||
|
@ -73,7 +72,6 @@ public class ReplicationPrimaryPolicy implements HAPolicy<ReplicationPrimaryActi
|
||||||
boolean allowAutoFailback,
|
boolean allowAutoFailback,
|
||||||
DistributedPrimitiveManagerConfiguration distributedManagerConfiguration) {
|
DistributedPrimitiveManagerConfiguration distributedManagerConfiguration) {
|
||||||
return new ReplicationPrimaryPolicy(ReplicationPrimaryPolicyConfiguration.withDefault()
|
return new ReplicationPrimaryPolicy(ReplicationPrimaryPolicyConfiguration.withDefault()
|
||||||
.setCheckForLiveServer(false)
|
|
||||||
.setInitialReplicationSyncTimeout(initialReplicationSyncTimeout)
|
.setInitialReplicationSyncTimeout(initialReplicationSyncTimeout)
|
||||||
.setGroupName(groupName)
|
.setGroupName(groupName)
|
||||||
.setClusterName(clusterName)
|
.setClusterName(clusterName)
|
||||||
|
@ -139,10 +137,6 @@ public class ReplicationPrimaryPolicy implements HAPolicy<ReplicationPrimaryActi
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean isCheckForLiveServer() {
|
|
||||||
return checkForLiveServer;
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean isAllowAutoFailBack() {
|
public boolean isAllowAutoFailBack() {
|
||||||
return allowAutoFailBack;
|
return allowAutoFailBack;
|
||||||
}
|
}
|
||||||
|
@ -163,4 +157,8 @@ public class ReplicationPrimaryPolicy implements HAPolicy<ReplicationPrimaryActi
|
||||||
public boolean useQuorumManager() {
|
public boolean useQuorumManager() {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getCoordinationId() {
|
||||||
|
return coordinationId;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -659,7 +659,7 @@ public class ActiveMQServerImpl implements ActiveMQServer {
|
||||||
afterActivationCreated.run();
|
afterActivationCreated.run();
|
||||||
} catch (Throwable e) {
|
} catch (Throwable e) {
|
||||||
logger.warn(e.getMessage(), e); // just debug, this is not supposed to happend, and if it does
|
logger.warn(e.getMessage(), e); // just debug, this is not supposed to happend, and if it does
|
||||||
// it will be embedeed code from tests
|
// it will be embedded code from tests
|
||||||
}
|
}
|
||||||
afterActivationCreated = null;
|
afterActivationCreated = null;
|
||||||
}
|
}
|
||||||
|
@ -2877,6 +2877,8 @@ public class ActiveMQServerImpl implements ActiveMQServer {
|
||||||
public String toString() {
|
public String toString() {
|
||||||
if (identity != null) {
|
if (identity != null) {
|
||||||
return "ActiveMQServerImpl::" + identity;
|
return "ActiveMQServerImpl::" + identity;
|
||||||
|
} else if (configuration != null && configuration.getName() != null) {
|
||||||
|
return "ActiveMQServerImpl::" + "name=" + configuration.getName();
|
||||||
}
|
}
|
||||||
return "ActiveMQServerImpl::" + (nodeManager != null ? "serverUUID=" + nodeManager.getUUID() : "");
|
return "ActiveMQServerImpl::" + (nodeManager != null ? "serverUUID=" + nodeManager.getUUID() : "");
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,160 +0,0 @@
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package org.apache.activemq.artemis.core.server.impl;
|
|
||||||
|
|
||||||
import java.util.concurrent.CountDownLatch;
|
|
||||||
import java.util.concurrent.TimeUnit;
|
|
||||||
|
|
||||||
import org.apache.activemq.artemis.api.core.ActiveMQException;
|
|
||||||
import org.apache.activemq.artemis.api.core.ActiveMQExceptionType;
|
|
||||||
import org.apache.activemq.artemis.api.core.DiscoveryGroupConfiguration;
|
|
||||||
import org.apache.activemq.artemis.api.core.TransportConfiguration;
|
|
||||||
import org.apache.activemq.artemis.api.core.client.ActiveMQClient;
|
|
||||||
import org.apache.activemq.artemis.api.core.client.ClientSession;
|
|
||||||
import org.apache.activemq.artemis.api.core.client.ClientSessionFactory;
|
|
||||||
import org.apache.activemq.artemis.api.core.client.ClusterTopologyListener;
|
|
||||||
import org.apache.activemq.artemis.api.core.client.ServerLocator;
|
|
||||||
import org.apache.activemq.artemis.api.core.client.TopologyMember;
|
|
||||||
import org.apache.activemq.artemis.core.client.impl.ClientSessionFactoryInternal;
|
|
||||||
import org.apache.activemq.artemis.core.client.impl.ServerLocatorInternal;
|
|
||||||
import org.apache.activemq.artemis.core.config.ClusterConnectionConfiguration;
|
|
||||||
import org.apache.activemq.artemis.core.config.Configuration;
|
|
||||||
import org.apache.activemq.artemis.core.config.ConfigurationUtils;
|
|
||||||
import org.apache.activemq.artemis.core.server.ActiveMQMessageBundle;
|
|
||||||
import org.apache.activemq.artemis.core.server.ActiveMQServerLogger;
|
|
||||||
import org.jboss.logging.Logger;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This class contains some utils to allow a broker to check presence and role of another broker in the cluster.
|
|
||||||
*/
|
|
||||||
final class ClusterTopologySearch {
|
|
||||||
|
|
||||||
private ClusterTopologySearch() {
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Determines whether there is a live server already running with nodeID.<br>
|
|
||||||
* This search isn't filtering the caller broker transport and is meant to be used
|
|
||||||
* when the broker acceptors aren't running yet.
|
|
||||||
*/
|
|
||||||
public static boolean searchActiveLiveNodeId(String clusterName,
|
|
||||||
String nodeId,
|
|
||||||
long timeout,
|
|
||||||
TimeUnit unit,
|
|
||||||
Configuration serverConfiguration) throws ActiveMQException {
|
|
||||||
if (serverConfiguration.getClusterConfigurations().isEmpty())
|
|
||||||
return false;
|
|
||||||
final ClusterConnectionConfiguration clusterConnectionConfiguration = ConfigurationUtils.getReplicationClusterConfiguration(serverConfiguration, clusterName);
|
|
||||||
|
|
||||||
final LiveNodeIdListener liveNodeIdListener = new LiveNodeIdListener(nodeId, serverConfiguration.getClusterUser(), serverConfiguration.getClusterPassword());
|
|
||||||
|
|
||||||
try (ServerLocatorInternal locator = createLocator(serverConfiguration, clusterConnectionConfiguration)) {
|
|
||||||
// if would like to filter out a transport configuration:
|
|
||||||
// locator.setClusterTransportConfiguration(callerBrokerTransportConfiguration)
|
|
||||||
locator.addClusterTopologyListener(liveNodeIdListener);
|
|
||||||
locator.setReconnectAttempts(0);
|
|
||||||
try (ClientSessionFactoryInternal ignored = locator.connectNoWarnings()) {
|
|
||||||
return liveNodeIdListener.awaitNodePresent(timeout, unit);
|
|
||||||
} catch (Exception notConnected) {
|
|
||||||
if (!(notConnected instanceof ActiveMQException) || ActiveMQExceptionType.INTERNAL_ERROR.equals(((ActiveMQException) notConnected).getType())) {
|
|
||||||
// report all exceptions that aren't ActiveMQException and all INTERNAL_ERRORs
|
|
||||||
ActiveMQServerLogger.LOGGER.failedConnectingToCluster(notConnected);
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static final class LiveNodeIdListener implements ClusterTopologyListener {
|
|
||||||
|
|
||||||
private static final Logger logger = Logger.getLogger(LiveNodeIdListener.class);
|
|
||||||
private final String nodeId;
|
|
||||||
private final String user;
|
|
||||||
private final String password;
|
|
||||||
private final CountDownLatch searchCompleted;
|
|
||||||
private boolean isNodePresent = false;
|
|
||||||
|
|
||||||
LiveNodeIdListener(String nodeId, String user, String password) {
|
|
||||||
this.nodeId = nodeId;
|
|
||||||
this.user = user;
|
|
||||||
this.password = password;
|
|
||||||
this.searchCompleted = new CountDownLatch(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void nodeUP(TopologyMember topologyMember, boolean last) {
|
|
||||||
boolean isOurNodeId = nodeId != null && nodeId.equals(topologyMember.getNodeId());
|
|
||||||
if (isOurNodeId && isActive(topologyMember.getLive())) {
|
|
||||||
isNodePresent = true;
|
|
||||||
}
|
|
||||||
if (isOurNodeId || last) {
|
|
||||||
searchCompleted.countDown();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean awaitNodePresent(long timeout, TimeUnit unit) throws InterruptedException {
|
|
||||||
searchCompleted.await(timeout, unit);
|
|
||||||
return isNodePresent;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* In a cluster of replicated live/backup pairs if a backup crashes and then its live crashes the cluster will
|
|
||||||
* retain the topology information of the live such that when the live server restarts it will check the
|
|
||||||
* cluster to see if its nodeID is present (which it will be) and then it will activate as a backup rather than
|
|
||||||
* a live. To prevent this situation an additional check is necessary to see if the server with the matching
|
|
||||||
* nodeID is actually active or not which is done by attempting to make a connection to it.
|
|
||||||
*
|
|
||||||
* @param transportConfiguration
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
private boolean isActive(TransportConfiguration transportConfiguration) {
|
|
||||||
try (ServerLocator serverLocator = ActiveMQClient.createServerLocator(false, transportConfiguration);
|
|
||||||
ClientSessionFactory clientSessionFactory = serverLocator.createSessionFactory();
|
|
||||||
ClientSession clientSession = clientSessionFactory.createSession(user, password, false, false, false, false, 0)) {
|
|
||||||
return true;
|
|
||||||
} catch (Exception e) {
|
|
||||||
logger.debug("isActive check failed", e);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void nodeDown(long eventUID, String nodeID) {
|
|
||||||
// no-op
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static ServerLocatorInternal createLocator(Configuration configuration,
|
|
||||||
ClusterConnectionConfiguration config) throws ActiveMQException {
|
|
||||||
final ServerLocatorInternal locator;
|
|
||||||
if (config.getDiscoveryGroupName() != null) {
|
|
||||||
DiscoveryGroupConfiguration dg = configuration.getDiscoveryGroupConfigurations().get(config.getDiscoveryGroupName());
|
|
||||||
|
|
||||||
if (dg == null) {
|
|
||||||
throw ActiveMQMessageBundle.BUNDLE.noDiscoveryGroupFound(null);
|
|
||||||
}
|
|
||||||
locator = (ServerLocatorInternal) ActiveMQClient.createServerLocatorWithHA(dg);
|
|
||||||
} else {
|
|
||||||
TransportConfiguration[] tcConfigs = config.getStaticConnectors() != null ? configuration.getTransportConfigurations(config.getStaticConnectors()) : null;
|
|
||||||
|
|
||||||
locator = (ServerLocatorInternal) ActiveMQClient.createServerLocatorWithHA(tcConfigs);
|
|
||||||
}
|
|
||||||
return locator;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -28,7 +28,7 @@ import org.apache.activemq.artemis.core.server.NodeManager;
|
||||||
import org.apache.activemq.artemis.utils.UUID;
|
import org.apache.activemq.artemis.utils.UUID;
|
||||||
import org.apache.activemq.artemis.utils.UUIDGenerator;
|
import org.apache.activemq.artemis.utils.UUIDGenerator;
|
||||||
|
|
||||||
import static java.nio.file.StandardOpenOption.CREATE_NEW;
|
import static java.nio.file.StandardOpenOption.CREATE;
|
||||||
import static java.nio.file.StandardOpenOption.READ;
|
import static java.nio.file.StandardOpenOption.READ;
|
||||||
import static java.nio.file.StandardOpenOption.WRITE;
|
import static java.nio.file.StandardOpenOption.WRITE;
|
||||||
|
|
||||||
|
@ -36,33 +36,36 @@ public abstract class FileBasedNodeManager extends NodeManager {
|
||||||
|
|
||||||
protected static final byte FIRST_TIME_START = '0';
|
protected static final byte FIRST_TIME_START = '0';
|
||||||
public static final String SERVER_LOCK_NAME = "server.lock";
|
public static final String SERVER_LOCK_NAME = "server.lock";
|
||||||
public static final String DATA_VERSION_NAME = "server.data.version";
|
public static final String SERVER_ACTIVATION_SEQUENCE_NAME = "server.activation.sequence";
|
||||||
private static final String ACCESS_MODE = "rw";
|
private static final String ACCESS_MODE = "rw";
|
||||||
private final File directory;
|
private final File directory;
|
||||||
protected FileChannel channel;
|
protected FileChannel channel;
|
||||||
protected FileChannel dataVersionChannel;
|
protected FileChannel activationSequenceChannel;
|
||||||
|
|
||||||
public FileBasedNodeManager(boolean replicatedBackup, File directory) {
|
public FileBasedNodeManager(boolean replicatedBackup, File directory) {
|
||||||
super(replicatedBackup);
|
super(replicatedBackup);
|
||||||
this.directory = directory;
|
this.directory = directory;
|
||||||
|
if (directory != null) {
|
||||||
|
directory.mkdirs();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void useDataVersionChannel() throws IOException {
|
protected void useActivationSequenceChannel() throws IOException {
|
||||||
if (dataVersionChannel != null) {
|
if (activationSequenceChannel != null) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
dataVersionChannel = FileChannel.open(newFile(DATA_VERSION_NAME).toPath(), READ, WRITE, CREATE_NEW);
|
activationSequenceChannel = FileChannel.open(newFile(SERVER_ACTIVATION_SEQUENCE_NAME).toPath(), READ, WRITE, CREATE);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long readDataVersion() throws NodeManagerException {
|
public long readNodeActivationSequence() throws NodeManagerException {
|
||||||
if (!isStarted()) {
|
if (!isStarted()) {
|
||||||
throw new NodeManagerException(new IllegalStateException("node manager must be started first"));
|
throw new NodeManagerException(new IllegalStateException("node manager must be started first"));
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
useDataVersionChannel();
|
useActivationSequenceChannel();
|
||||||
ByteBuffer tmpBuffer = ByteBuffer.allocate(Long.BYTES).order(ByteOrder.BIG_ENDIAN);
|
ByteBuffer tmpBuffer = ByteBuffer.allocate(Long.BYTES).order(ByteOrder.BIG_ENDIAN);
|
||||||
if (dataVersionChannel.read(tmpBuffer, 0) != Long.BYTES) {
|
if (activationSequenceChannel.read(tmpBuffer, 0) != Long.BYTES) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
tmpBuffer.flip();
|
tmpBuffer.flip();
|
||||||
|
@ -73,16 +76,17 @@ public abstract class FileBasedNodeManager extends NodeManager {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void writeDataVersion(long version) throws NodeManagerException {
|
public void writeNodeActivationSequence(long version) throws NodeManagerException {
|
||||||
if (!isStarted()) {
|
if (!isStarted()) {
|
||||||
throw new NodeManagerException(new IllegalStateException("node manager must be started first"));
|
throw new NodeManagerException(new IllegalStateException("node manager must be started first"));
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
useDataVersionChannel();
|
useActivationSequenceChannel();
|
||||||
ByteBuffer tmpBuffer = ByteBuffer.allocate(Long.BYTES).order(ByteOrder.BIG_ENDIAN);
|
ByteBuffer tmpBuffer = ByteBuffer.allocate(Long.BYTES).order(ByteOrder.BIG_ENDIAN);
|
||||||
tmpBuffer.putLong(0, version);
|
tmpBuffer.putLong(0, version);
|
||||||
dataVersionChannel.write(tmpBuffer, 0);
|
activationSequenceChannel.write(tmpBuffer, 0);
|
||||||
dataVersionChannel.force(false);
|
activationSequenceChannel.force(false);
|
||||||
|
setNodeActivationSequence(version);
|
||||||
} catch (IOException ie) {
|
} catch (IOException ie) {
|
||||||
throw new NodeManagerException(ie);
|
throw new NodeManagerException(ie);
|
||||||
}
|
}
|
||||||
|
@ -149,12 +153,8 @@ public abstract class FileBasedNodeManager extends NodeManager {
|
||||||
createNodeId();
|
createNodeId();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
protected final File newFile(final String fileName) {
|
protected final File newFile(final String fileName) {
|
||||||
File file = new File(directory, fileName);
|
return new File(directory, fileName);
|
||||||
return file;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected final synchronized void createNodeId() throws IOException {
|
protected final synchronized void createNodeId() throws IOException {
|
||||||
|
@ -190,8 +190,8 @@ public abstract class FileBasedNodeManager extends NodeManager {
|
||||||
channelCopy.close();
|
channelCopy.close();
|
||||||
} finally {
|
} finally {
|
||||||
try {
|
try {
|
||||||
FileChannel dataVersionChannel = this.dataVersionChannel;
|
FileChannel dataVersionChannel = this.activationSequenceChannel;
|
||||||
this.dataVersionChannel = null;
|
this.activationSequenceChannel = null;
|
||||||
if (dataVersionChannel != null) {
|
if (dataVersionChannel != null) {
|
||||||
dataVersionChannel.close();
|
dataVersionChannel.close();
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,9 +19,6 @@ package org.apache.activemq.artemis.core.server.impl;
|
||||||
import javax.annotation.concurrent.GuardedBy;
|
import javax.annotation.concurrent.GuardedBy;
|
||||||
|
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.concurrent.ExecutionException;
|
|
||||||
import java.util.concurrent.TimeUnit;
|
|
||||||
import java.util.concurrent.TimeoutException;
|
|
||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
import java.util.function.Consumer;
|
import java.util.function.Consumer;
|
||||||
|
|
||||||
|
@ -35,7 +32,7 @@ import org.apache.activemq.artemis.core.replication.ReplicationEndpoint;
|
||||||
import org.apache.activemq.artemis.core.server.ActiveMQServer;
|
import org.apache.activemq.artemis.core.server.ActiveMQServer;
|
||||||
import org.apache.activemq.artemis.core.server.ActiveMQServerLogger;
|
import org.apache.activemq.artemis.core.server.ActiveMQServerLogger;
|
||||||
import org.apache.activemq.artemis.core.server.LiveNodeLocator;
|
import org.apache.activemq.artemis.core.server.LiveNodeLocator;
|
||||||
import org.apache.activemq.artemis.core.server.NodeManager;;
|
import org.apache.activemq.artemis.core.server.NodeManager;
|
||||||
import org.apache.activemq.artemis.core.server.cluster.ClusterControl;
|
import org.apache.activemq.artemis.core.server.cluster.ClusterControl;
|
||||||
import org.apache.activemq.artemis.core.server.cluster.ClusterController;
|
import org.apache.activemq.artemis.core.server.cluster.ClusterController;
|
||||||
import org.apache.activemq.artemis.core.server.cluster.ha.ReplicationBackupPolicy;
|
import org.apache.activemq.artemis.core.server.cluster.ha.ReplicationBackupPolicy;
|
||||||
|
@ -45,6 +42,8 @@ import org.apache.activemq.artemis.quorum.UnavailableStateException;
|
||||||
import org.jboss.logging.Logger;
|
import org.jboss.logging.Logger;
|
||||||
|
|
||||||
import static org.apache.activemq.artemis.core.server.impl.ReplicationObserver.ReplicationFailure;
|
import static org.apache.activemq.artemis.core.server.impl.ReplicationObserver.ReplicationFailure;
|
||||||
|
import static org.apache.activemq.artemis.core.server.impl.quorum.ActivationSequenceStateMachine.ensureSequentialAccessToNodeData;
|
||||||
|
import static org.apache.activemq.artemis.core.server.impl.quorum.ActivationSequenceStateMachine.tryActivate;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This activation can be used by a primary while trying to fail-back ie {@code failback == true} or
|
* This activation can be used by a primary while trying to fail-back ie {@code failback == true} or
|
||||||
|
@ -54,7 +53,6 @@ public final class ReplicationBackupActivation extends Activation implements Dis
|
||||||
|
|
||||||
private static final Logger LOGGER = Logger.getLogger(ReplicationBackupActivation.class);
|
private static final Logger LOGGER = Logger.getLogger(ReplicationBackupActivation.class);
|
||||||
|
|
||||||
private final boolean wasLive;
|
|
||||||
private final ReplicationBackupPolicy policy;
|
private final ReplicationBackupPolicy policy;
|
||||||
private final ActiveMQServerImpl activeMQServer;
|
private final ActiveMQServerImpl activeMQServer;
|
||||||
// This field is != null iff this node is a primary during a fail-back ie acting as a backup in order to become live again.
|
// This field is != null iff this node is a primary during a fail-back ie acting as a backup in order to become live again.
|
||||||
|
@ -72,10 +70,8 @@ public final class ReplicationBackupActivation extends Activation implements Dis
|
||||||
private final AtomicBoolean stopping;
|
private final AtomicBoolean stopping;
|
||||||
|
|
||||||
public ReplicationBackupActivation(final ActiveMQServerImpl activeMQServer,
|
public ReplicationBackupActivation(final ActiveMQServerImpl activeMQServer,
|
||||||
final boolean wasLive,
|
|
||||||
final DistributedPrimitiveManager distributedManager,
|
final DistributedPrimitiveManager distributedManager,
|
||||||
final ReplicationBackupPolicy policy) {
|
final ReplicationBackupPolicy policy) {
|
||||||
this.wasLive = wasLive;
|
|
||||||
this.activeMQServer = activeMQServer;
|
this.activeMQServer = activeMQServer;
|
||||||
if (policy.isTryFailback()) {
|
if (policy.isTryFailback()) {
|
||||||
final SimpleString serverNodeID = activeMQServer.getNodeID();
|
final SimpleString serverNodeID = activeMQServer.getNodeID();
|
||||||
|
@ -146,14 +142,35 @@ public final class ReplicationBackupActivation extends Activation implements Dis
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
LOGGER.info("Trying to reach majority of quorum service nodes");
|
|
||||||
distributedManager.start();
|
distributedManager.start();
|
||||||
LOGGER.info("Quorum service available: starting broker");
|
final long nodeActivationSequence = activeMQServer.getNodeManager().readNodeActivationSequence();
|
||||||
|
// only a backup with positive local activation sequence could contain valuable data
|
||||||
|
if (nodeActivationSequence > 0) {
|
||||||
|
final String nodeId = activeMQServer.getNodeManager().getNodeId().toString();
|
||||||
|
DistributedLock liveLockWithInSyncReplica;
|
||||||
|
while (true) {
|
||||||
|
distributedManager.start();
|
||||||
|
try {
|
||||||
|
liveLockWithInSyncReplica = tryActivate(nodeId, nodeActivationSequence, distributedManager, LOGGER);
|
||||||
|
break;
|
||||||
|
} catch (UnavailableStateException canRecoverEx) {
|
||||||
|
distributedManager.stop();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (liveLockWithInSyncReplica != null) {
|
||||||
|
// retain state and start as live
|
||||||
|
if (!activeMQServer.initialisePart1(false)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
activeMQServer.setState(ActiveMQServerImpl.SERVER_STATE.STARTED);
|
||||||
|
startAsLive(liveLockWithInSyncReplica);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
distributedManager.addUnavailableManagerListener(this);
|
distributedManager.addUnavailableManagerListener(this);
|
||||||
// Stop the previous node manager and create a new one with NodeManager::replicatedBackup == true:
|
// Stop the previous node manager and create a new one with NodeManager::replicatedBackup == true:
|
||||||
// NodeManager::start skip setup lock file with NodeID, until NodeManager::stopBackup is called.
|
// NodeManager::start skip setup lock file with NodeID, until NodeManager::stopBackup is called.
|
||||||
activeMQServer.resetNodeManager();
|
activeMQServer.resetNodeManager();
|
||||||
activeMQServer.getNodeManager().stop();
|
|
||||||
// A primary need to preserve NodeID across runs
|
// A primary need to preserve NodeID across runs
|
||||||
activeMQServer.moveServerData(policy.getMaxSavedReplicatedJournalsSize(), policy.isTryFailback());
|
activeMQServer.moveServerData(policy.getMaxSavedReplicatedJournalsSize(), policy.isTryFailback());
|
||||||
activeMQServer.getNodeManager().start();
|
activeMQServer.getNodeManager().start();
|
||||||
|
@ -164,11 +181,15 @@ public final class ReplicationBackupActivation extends Activation implements Dis
|
||||||
if (closed)
|
if (closed)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
final ClusterController clusterController = activeMQServer.getClusterManager().getClusterController();
|
final ClusterController clusterController = activeMQServer.getClusterManager().getClusterController();
|
||||||
|
|
||||||
|
LOGGER.infof("Apache ActiveMQ Artemis Backup Server version %s [%s] started, awaiting connection to a live cluster member to start replication", activeMQServer.getVersion().getFullVersion(),
|
||||||
|
activeMQServer.toString());
|
||||||
|
|
||||||
clusterController.awaitConnectionToReplicationCluster();
|
clusterController.awaitConnectionToReplicationCluster();
|
||||||
activeMQServer.getBackupManager().start();
|
activeMQServer.getBackupManager().start();
|
||||||
ActiveMQServerLogger.LOGGER.backupServerStarted(activeMQServer.getVersion().getFullVersion(),
|
|
||||||
activeMQServer.getNodeManager().getNodeId());
|
|
||||||
activeMQServer.setState(ActiveMQServerImpl.SERVER_STATE.STARTED);
|
activeMQServer.setState(ActiveMQServerImpl.SERVER_STATE.STARTED);
|
||||||
final DistributedLock liveLock = replicateAndFailover(clusterController);
|
final DistributedLock liveLock = replicateAndFailover(clusterController);
|
||||||
if (liveLock == null) {
|
if (liveLock == null) {
|
||||||
|
@ -192,6 +213,14 @@ public final class ReplicationBackupActivation extends Activation implements Dis
|
||||||
liveLock.close();
|
liveLock.close();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
try {
|
||||||
|
ensureSequentialAccessToNodeData(activeMQServer, distributedManager, LOGGER);
|
||||||
|
} catch (Throwable fatal) {
|
||||||
|
LOGGER.warn(fatal);
|
||||||
|
// policy is already live one, but there's no activation yet: we can just stop
|
||||||
|
asyncRestartServer(activeMQServer, false, false);
|
||||||
|
throw new ActiveMQIllegalStateException("This server cannot ensure sequential access to broker data: activation is failed");
|
||||||
|
}
|
||||||
ActiveMQServerLogger.LOGGER.becomingLive(activeMQServer);
|
ActiveMQServerLogger.LOGGER.becomingLive(activeMQServer);
|
||||||
// stopBackup is going to write the NodeID previously set on the NodeManager,
|
// stopBackup is going to write the NodeID previously set on the NodeManager,
|
||||||
// because activeMQServer.resetNodeManager() has created a NodeManager with replicatedBackup == true.
|
// because activeMQServer.resetNodeManager() has created a NodeManager with replicatedBackup == true.
|
||||||
|
@ -260,28 +289,38 @@ public final class ReplicationBackupActivation extends Activation implements Dis
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
LOGGER.debugf("ReplicationFailure = %s", failure);
|
LOGGER.debugf("ReplicationFailure = %s", failure);
|
||||||
boolean voluntaryFailOver = false;
|
|
||||||
switch (failure) {
|
switch (failure) {
|
||||||
case VoluntaryFailOver:
|
case VoluntaryFailOver:
|
||||||
voluntaryFailOver = true;
|
|
||||||
case NonVoluntaryFailover:
|
case NonVoluntaryFailover:
|
||||||
final DistributedLock liveLock = tryAcquireLiveLock();
|
|
||||||
// from now on we're meant to stop:
|
// from now on we're meant to stop:
|
||||||
// - due to failover
|
// - due to failover
|
||||||
// - due to restart/stop
|
// - due to restart/stop
|
||||||
assert stopping.get();
|
if (!stopping.compareAndSet(false, true)) {
|
||||||
if (liveLock != null) {
|
return null;
|
||||||
return liveLock;
|
}
|
||||||
|
// no more interested into these events: handling it manually from here
|
||||||
|
distributedManager.removeUnavailableManagerListener(this);
|
||||||
|
final long nodeActivationSequence = activeMQServer.getNodeManager().readNodeActivationSequence();
|
||||||
|
final String nodeId = activeMQServer.getNodeManager().getNodeId().toString();
|
||||||
|
DistributedLock liveLockWithInSyncReplica = null;
|
||||||
|
if (nodeActivationSequence > 0) {
|
||||||
|
try {
|
||||||
|
liveLockWithInSyncReplica = tryActivate(nodeId, nodeActivationSequence, distributedManager, LOGGER);
|
||||||
|
} catch (Throwable error) {
|
||||||
|
// no need to retry here, can just restart as backup that will handle a more resilient tryActivate
|
||||||
|
LOGGER.warn("Errored while attempting failover", error);
|
||||||
|
liveLockWithInSyncReplica = null;
|
||||||
}
|
}
|
||||||
boolean restart = true;
|
|
||||||
if (voluntaryFailOver && isFirstFailbackAttempt()) {
|
|
||||||
restart = false;
|
|
||||||
LOGGER.error("Failed to fail-back: stopping broker based on quorum results");
|
|
||||||
} else {
|
} else {
|
||||||
ActiveMQServerLogger.LOGGER.restartingAsBackupBasedOnQuorumVoteResults();
|
LOGGER.warnf("We expect local activation sequence for NodeID = %s to be > 0 on a fail-over, while is %d", nodeId, nodeActivationSequence);
|
||||||
}
|
}
|
||||||
// let's ignore the stopping flag here, we're in control of it
|
assert stopping.get();
|
||||||
asyncRestartServer(activeMQServer, restart, false);
|
if (liveLockWithInSyncReplica != null) {
|
||||||
|
return liveLockWithInSyncReplica;
|
||||||
|
}
|
||||||
|
ActiveMQServerLogger.LOGGER.restartingAsBackupBasedOnQuorumVoteResults();
|
||||||
|
// let's ignore the stopping flag here, we're already in control of it
|
||||||
|
asyncRestartServer(activeMQServer, true, false);
|
||||||
return null;
|
return null;
|
||||||
case RegistrationError:
|
case RegistrationError:
|
||||||
LOGGER.error("Stopping broker because of critical registration error");
|
LOGGER.error("Stopping broker because of critical registration error");
|
||||||
|
@ -307,73 +346,7 @@ public final class ReplicationBackupActivation extends Activation implements Dis
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
silentExecution("Errored on cluster topology listener for replication cleanup", () -> clusterController.removeClusterTopologyListenerForReplication(nodeLocator));
|
silentExecution("Error on cluster topology listener for replication cleanup", () -> clusterController.removeClusterTopologyListenerForReplication(nodeLocator));
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* {@code wasLive} is {code true} only while transitioning from primary to backup.<br>
|
|
||||||
* If a natural born backup become live and allows failback, while transitioning to back again
|
|
||||||
* {@code wasLive} is still {@code false}.<br>
|
|
||||||
* The check on {@link ReplicationBackupPolicy#isTryFailback()} is redundant but still useful for correctness.
|
|
||||||
* <p>
|
|
||||||
* In case of fail-back, any event that's going to restart this broker as backup (eg quorum service unavailable
|
|
||||||
* or some replication failures) will cause {@code wasLive} to be {@code false}, because the HA policy set isn't
|
|
||||||
* a primary anymore.
|
|
||||||
*/
|
|
||||||
private boolean isFirstFailbackAttempt() {
|
|
||||||
return wasLive && policy.isTryFailback();
|
|
||||||
}
|
|
||||||
|
|
||||||
private DistributedLock tryAcquireLiveLock() throws InterruptedException {
|
|
||||||
// disable quorum service unavailability handling and just treat this imperatively
|
|
||||||
if (!stopping.compareAndSet(false, true)) {
|
|
||||||
// already unavailable quorum service: fail fast
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
distributedManager.removeUnavailableManagerListener(this);
|
|
||||||
assert activeMQServer.getNodeManager().getNodeId() != null;
|
|
||||||
final String liveID = activeMQServer.getNodeManager().getNodeId().toString();
|
|
||||||
final int voteRetries = policy.getVoteRetries();
|
|
||||||
final long maxAttempts = voteRetries >= 0 ? (voteRetries + 1) : -1;
|
|
||||||
if (maxAttempts == -1) {
|
|
||||||
LOGGER.error("It's not safe to retry an infinite amount of time to acquire a live lock: please consider setting a vote-retries value");
|
|
||||||
}
|
|
||||||
final long voteRetryWait = policy.getVoteRetryWait();
|
|
||||||
final DistributedLock liveLock = getLock(distributedManager, liveID);
|
|
||||||
if (liveLock == null) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
for (long attempt = 0; maxAttempts >= 0 ? (attempt < maxAttempts) : true; attempt++) {
|
|
||||||
try {
|
|
||||||
if (liveLock.tryLock(voteRetryWait, TimeUnit.MILLISECONDS)) {
|
|
||||||
LOGGER.debugf("%s live lock acquired after %d attempts.", liveID, (attempt + 1));
|
|
||||||
return liveLock;
|
|
||||||
}
|
|
||||||
} catch (UnavailableStateException e) {
|
|
||||||
LOGGER.warnf(e, "Failed to acquire live lock %s because of unavailable quorum service: stop trying", liveID);
|
|
||||||
distributedManager.stop();
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
LOGGER.warnf("Failed to acquire live lock %s after %d tries", liveID, maxAttempts);
|
|
||||||
distributedManager.stop();
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
private DistributedLock getLock(final DistributedPrimitiveManager manager,
|
|
||||||
final String lockId) throws InterruptedException {
|
|
||||||
if (!manager.isStarted()) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
return manager.getDistributedLock(lockId);
|
|
||||||
} catch (ExecutionException e) {
|
|
||||||
LOGGER.warnf(e, "Errored while getting lock %s", lockId);
|
|
||||||
return null;
|
|
||||||
} catch (TimeoutException te) {
|
|
||||||
LOGGER.warnf(te, "Timeout while getting lock %s", lockId);
|
|
||||||
return null;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -405,7 +378,6 @@ public final class ReplicationBackupActivation extends Activation implements Dis
|
||||||
return ReplicationFailure.RegistrationError;
|
return ReplicationFailure.RegistrationError;
|
||||||
}
|
}
|
||||||
this.replicationEndpoint = replicationEndpoint;
|
this.replicationEndpoint = replicationEndpoint;
|
||||||
assert replicationEndpoint != null;
|
|
||||||
try {
|
try {
|
||||||
return replicationObserver.awaitReplicationFailure();
|
return replicationObserver.awaitReplicationFailure();
|
||||||
} finally {
|
} finally {
|
||||||
|
@ -414,11 +386,11 @@ public final class ReplicationBackupActivation extends Activation implements Dis
|
||||||
closeChannelOf(replicationEndpoint);
|
closeChannelOf(replicationEndpoint);
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
silentExecution("Errored on live control close", liveControl::close);
|
silentExecution("Error on live control close", liveControl::close);
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
silentExecution("Errored on cluster topology listener cleanup", () -> clusterController.removeClusterTopologyListener(replicationObserver));
|
silentExecution("Error on cluster topology listener cleanup", () -> clusterController.removeClusterTopologyListener(replicationObserver));
|
||||||
silentExecution("Errored while removing incoming interceptor for replication", () -> clusterController.removeIncomingInterceptorForReplication(replicationError));
|
silentExecution("Error while removing incoming interceptor for replication", () -> clusterController.removeIncomingInterceptorForReplication(replicationError));
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
this.replicationObserver = null;
|
this.replicationObserver = null;
|
||||||
|
@ -438,7 +410,7 @@ public final class ReplicationBackupActivation extends Activation implements Dis
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (replicationEndpoint.getChannel() != null) {
|
if (replicationEndpoint.getChannel() != null) {
|
||||||
silentExecution("Errored while closing replication endpoint channel", () -> replicationEndpoint.getChannel().close());
|
silentExecution("Error while closing replication endpoint channel", () -> replicationEndpoint.getChannel().close());
|
||||||
replicationEndpoint.setChannel(null);
|
replicationEndpoint.setChannel(null);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,12 +19,14 @@ package org.apache.activemq.artemis.core.server.impl;
|
||||||
import javax.annotation.concurrent.GuardedBy;
|
import javax.annotation.concurrent.GuardedBy;
|
||||||
import java.util.concurrent.ExecutorService;
|
import java.util.concurrent.ExecutorService;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
|
||||||
import org.apache.activemq.artemis.api.core.ActiveMQAlreadyReplicatingException;
|
import org.apache.activemq.artemis.api.core.ActiveMQAlreadyReplicatingException;
|
||||||
import org.apache.activemq.artemis.api.core.ActiveMQException;
|
import org.apache.activemq.artemis.api.core.ActiveMQException;
|
||||||
import org.apache.activemq.artemis.api.core.ActiveMQIllegalStateException;
|
import org.apache.activemq.artemis.api.core.ActiveMQIllegalStateException;
|
||||||
import org.apache.activemq.artemis.api.core.Pair;
|
import org.apache.activemq.artemis.api.core.Pair;
|
||||||
import org.apache.activemq.artemis.api.core.TransportConfiguration;
|
import org.apache.activemq.artemis.api.core.TransportConfiguration;
|
||||||
|
import org.apache.activemq.artemis.core.persistence.StorageManager;
|
||||||
import org.apache.activemq.artemis.core.protocol.core.Channel;
|
import org.apache.activemq.artemis.core.protocol.core.Channel;
|
||||||
import org.apache.activemq.artemis.core.protocol.core.ChannelHandler;
|
import org.apache.activemq.artemis.core.protocol.core.ChannelHandler;
|
||||||
import org.apache.activemq.artemis.core.protocol.core.CoreRemotingConnection;
|
import org.apache.activemq.artemis.core.protocol.core.CoreRemotingConnection;
|
||||||
|
@ -46,7 +48,10 @@ import org.apache.activemq.artemis.quorum.UnavailableStateException;
|
||||||
import org.apache.activemq.artemis.spi.core.remoting.Acceptor;
|
import org.apache.activemq.artemis.spi.core.remoting.Acceptor;
|
||||||
import org.jboss.logging.Logger;
|
import org.jboss.logging.Logger;
|
||||||
|
|
||||||
import static org.apache.activemq.artemis.core.server.impl.ClusterTopologySearch.searchActiveLiveNodeId;
|
import static org.apache.activemq.artemis.core.server.ActiveMQServer.SERVER_STATE.STARTED;
|
||||||
|
import static org.apache.activemq.artemis.core.server.impl.quorum.ActivationSequenceStateMachine.awaitNextCommittedActivationSequence;
|
||||||
|
import static org.apache.activemq.artemis.core.server.impl.quorum.ActivationSequenceStateMachine.ensureSequentialAccessToNodeData;
|
||||||
|
import static org.apache.activemq.artemis.core.server.impl.quorum.ActivationSequenceStateMachine.tryActivate;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This is going to be {@link #run()} just by natural born primary, at the first start.
|
* This is going to be {@link #run()} just by natural born primary, at the first start.
|
||||||
|
@ -55,8 +60,9 @@ import static org.apache.activemq.artemis.core.server.impl.ClusterTopologySearch
|
||||||
public class ReplicationPrimaryActivation extends LiveActivation implements DistributedLock.UnavailableLockListener {
|
public class ReplicationPrimaryActivation extends LiveActivation implements DistributedLock.UnavailableLockListener {
|
||||||
|
|
||||||
private static final Logger LOGGER = Logger.getLogger(ReplicationPrimaryActivation.class);
|
private static final Logger LOGGER = Logger.getLogger(ReplicationPrimaryActivation.class);
|
||||||
private static final long DISTRIBUTED_MANAGER_START_TIMEOUT_MILLIS = 20_000;
|
// This is the time we expect a replica to become a live from the quorum pov
|
||||||
private static final long BLOCKING_CALLS_TIMEOUT_MILLIS = 5_000;
|
// ie time to execute tryActivate and ensureSequentialAccessToNodeData
|
||||||
|
private static final long FAILBACK_TIMEOUT_MILLIS = 4_000;
|
||||||
|
|
||||||
private final ReplicationPrimaryPolicy policy;
|
private final ReplicationPrimaryPolicy policy;
|
||||||
|
|
||||||
|
@ -69,7 +75,7 @@ public class ReplicationPrimaryActivation extends LiveActivation implements Dist
|
||||||
|
|
||||||
private final DistributedPrimitiveManager distributedManager;
|
private final DistributedPrimitiveManager distributedManager;
|
||||||
|
|
||||||
private volatile boolean stoppingServer;
|
private final AtomicBoolean stoppingServer;
|
||||||
|
|
||||||
public ReplicationPrimaryActivation(final ActiveMQServerImpl activeMQServer,
|
public ReplicationPrimaryActivation(final ActiveMQServerImpl activeMQServer,
|
||||||
final DistributedPrimitiveManager distributedManager,
|
final DistributedPrimitiveManager distributedManager,
|
||||||
|
@ -78,6 +84,7 @@ public class ReplicationPrimaryActivation extends LiveActivation implements Dist
|
||||||
this.policy = policy;
|
this.policy = policy;
|
||||||
this.replicationLock = new Object();
|
this.replicationLock = new Object();
|
||||||
this.distributedManager = distributedManager;
|
this.distributedManager = distributedManager;
|
||||||
|
this.stoppingServer = new AtomicBoolean();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -101,19 +108,36 @@ public class ReplicationPrimaryActivation extends LiveActivation implements Dist
|
||||||
@Override
|
@Override
|
||||||
public void run() {
|
public void run() {
|
||||||
try {
|
try {
|
||||||
|
// we have a common nodeId that we can share and coordinate with between peers
|
||||||
final NodeManager nodeManager = activeMQServer.getNodeManager();
|
if (policy.getCoordinationId() != null) {
|
||||||
|
LOGGER.infof("Applying shared peer NodeID=%s to enable coordinated live activation", policy.getCoordinationId());
|
||||||
final String nodeId = nodeManager.readNodeId().toString();
|
// REVISIT: this is quite clunky, also in backup activation, we just need new nodeID persisted!
|
||||||
|
activeMQServer.resetNodeManager();
|
||||||
final long dataVersion = nodeManager.readDataVersion();
|
activeMQServer.getNodeManager().start();
|
||||||
|
activeMQServer.getNodeManager().setNodeID(policy.getCoordinationId());
|
||||||
final DistributedLock liveLock = searchLiveOrAcquireLiveLock(nodeId, BLOCKING_CALLS_TIMEOUT_MILLIS, TimeUnit.MILLISECONDS);
|
activeMQServer.getNodeManager().stopBackup();
|
||||||
|
}
|
||||||
|
final long nodeActivationSequence = activeMQServer.getNodeManager().readNodeActivationSequence();
|
||||||
|
final String nodeId = activeMQServer.getNodeManager().readNodeId().toString();
|
||||||
|
DistributedLock liveLock;
|
||||||
|
while (true) {
|
||||||
|
distributedManager.start();
|
||||||
|
try {
|
||||||
|
liveLock = tryActivate(nodeId, nodeActivationSequence, distributedManager, LOGGER);
|
||||||
|
break;
|
||||||
|
} catch (UnavailableStateException canRecoverEx) {
|
||||||
|
distributedManager.stop();
|
||||||
|
}
|
||||||
|
}
|
||||||
if (liveLock == null) {
|
if (liveLock == null) {
|
||||||
|
distributedManager.stop();
|
||||||
|
LOGGER.infof("This broker cannot become a live server with NodeID = %s: restarting as backup", nodeId);
|
||||||
|
activeMQServer.setHAPolicy(policy.getBackupPolicy());
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ensureSequentialAccessToNodeData(activeMQServer, distributedManager, LOGGER);
|
||||||
|
|
||||||
activeMQServer.initialisePart1(false);
|
activeMQServer.initialisePart1(false);
|
||||||
|
|
||||||
activeMQServer.initialisePart2(false);
|
activeMQServer.initialisePart2(false);
|
||||||
|
@ -142,73 +166,9 @@ public class ReplicationPrimaryActivation extends LiveActivation implements Dist
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private DistributedLock searchLiveOrAcquireLiveLock(final String nodeId,
|
|
||||||
final long blockingCallTimeout,
|
|
||||||
final TimeUnit unit) throws ActiveMQException, InterruptedException {
|
|
||||||
if (policy.isCheckForLiveServer()) {
|
|
||||||
LOGGER.infof("Searching a live server with NodeID = %s", nodeId);
|
|
||||||
if (searchActiveLiveNodeId(policy.getClusterName(), nodeId, blockingCallTimeout, unit, activeMQServer.getConfiguration())) {
|
|
||||||
LOGGER.infof("Found a live server with NodeID = %s: restarting as backup", nodeId);
|
|
||||||
activeMQServer.setHAPolicy(policy.getBackupPolicy());
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
startDistributedPrimitiveManager();
|
|
||||||
return acquireDistributeLock(getDistributeLock(nodeId), blockingCallTimeout, unit);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void startDistributedPrimitiveManager() throws InterruptedException, ActiveMQException {
|
|
||||||
LOGGER.infof("Trying to reach the majority of quorum nodes in %d ms.", DISTRIBUTED_MANAGER_START_TIMEOUT_MILLIS);
|
|
||||||
try {
|
|
||||||
if (distributedManager.start(DISTRIBUTED_MANAGER_START_TIMEOUT_MILLIS, TimeUnit.MILLISECONDS)) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
} catch (InterruptedException ie) {
|
|
||||||
throw ie;
|
|
||||||
} catch (Throwable t) {
|
|
||||||
LOGGER.debug(t);
|
|
||||||
}
|
|
||||||
assert !distributedManager.isStarted();
|
|
||||||
throw new ActiveMQException("Cannot reach the majority of quorum nodes");
|
|
||||||
}
|
|
||||||
|
|
||||||
private DistributedLock getDistributeLock(final String nodeId) throws InterruptedException, ActiveMQException {
|
|
||||||
try {
|
|
||||||
return distributedManager.getDistributedLock(nodeId);
|
|
||||||
} catch (Throwable t) {
|
|
||||||
try {
|
|
||||||
distributedManager.stop();
|
|
||||||
} catch (Throwable ignore) {
|
|
||||||
// don't care
|
|
||||||
}
|
|
||||||
if (t instanceof InterruptedException) {
|
|
||||||
throw (InterruptedException) t;
|
|
||||||
}
|
|
||||||
throw new ActiveMQException("Cannot obtain a live lock instance");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private DistributedLock acquireDistributeLock(final DistributedLock liveLock,
|
|
||||||
final long acquireLockTimeout,
|
|
||||||
final TimeUnit unit) throws InterruptedException, ActiveMQException {
|
|
||||||
try {
|
|
||||||
if (liveLock.tryLock(acquireLockTimeout, unit)) {
|
|
||||||
return liveLock;
|
|
||||||
}
|
|
||||||
} catch (UnavailableStateException e) {
|
|
||||||
LOGGER.debug(e);
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
distributedManager.stop();
|
|
||||||
} catch (Throwable ignore) {
|
|
||||||
// don't care
|
|
||||||
}
|
|
||||||
throw new ActiveMQException("Failed to become live");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ChannelHandler getActivationChannelHandler(final Channel channel, final Acceptor acceptorUsed) {
|
public ChannelHandler getActivationChannelHandler(final Channel channel, final Acceptor acceptorUsed) {
|
||||||
if (stoppingServer) {
|
if (stoppingServer.get()) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
return packet -> {
|
return packet -> {
|
||||||
|
@ -268,7 +228,7 @@ public class ReplicationPrimaryActivation extends LiveActivation implements Dist
|
||||||
awaitBackupAnnouncementOnFailbackRequest(clusterConnection);
|
awaitBackupAnnouncementOnFailbackRequest(clusterConnection);
|
||||||
}
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
if (activeMQServer.getState() == ActiveMQServerImpl.SERVER_STATE.STARTED) {
|
if (activeMQServer.getState() == STARTED) {
|
||||||
/*
|
/*
|
||||||
* The reasoning here is that the exception was either caused by (1) the
|
* The reasoning here is that the exception was either caused by (1) the
|
||||||
* (interaction with) the backup, or (2) by an IO Error at the storage. If (1), we
|
* (interaction with) the backup, or (2) by an IO Error at the storage. If (1), we
|
||||||
|
@ -283,11 +243,13 @@ public class ReplicationPrimaryActivation extends LiveActivation implements Dist
|
||||||
ActiveMQServerLogger.LOGGER.errorStoppingReplication(amqe);
|
ActiveMQServerLogger.LOGGER.errorStoppingReplication(amqe);
|
||||||
} finally {
|
} finally {
|
||||||
synchronized (replicationLock) {
|
synchronized (replicationLock) {
|
||||||
|
if (this.replicationManager == replicationManager) {
|
||||||
this.replicationManager = null;
|
this.replicationManager = null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This is handling awaiting backup announcement before trying to failover.
|
* This is handling awaiting backup announcement before trying to failover.
|
||||||
|
@ -308,35 +270,55 @@ public class ReplicationPrimaryActivation extends LiveActivation implements Dist
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* If {@link #asyncStopServer()} happens before this call, the restart just won't happen.
|
|
||||||
* If {@link #asyncStopServer()} happens after this call, will make the server to stop right after being restarted.
|
|
||||||
*/
|
|
||||||
private void restartAsBackupAfterFailback() throws Exception {
|
private void restartAsBackupAfterFailback() throws Exception {
|
||||||
if (stoppingServer) {
|
if (stoppingServer.get()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
synchronized (this) {
|
final String coordinatedLockAndNodeId;
|
||||||
if (stoppingServer) {
|
final long inSyncReplicaActivation;
|
||||||
|
synchronized (replicationLock) {
|
||||||
|
if (stoppingServer.get()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
distributedManager.stop();
|
final ReplicationManager replicationManager = this.replicationManager;
|
||||||
|
if (replicationManager == null) {
|
||||||
|
LOGGER.warnf("Failback interrupted");
|
||||||
|
// we got a disconnection from the replica *before* stopping acceptors: better not failback!
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// IMPORTANT: this is going to save server::fail to issue a replica connection failure (with failed == false)
|
||||||
|
// because onReplicationConnectionClose fail-fast on stopping == true.
|
||||||
|
if (!stoppingServer.compareAndSet(false, true)) {
|
||||||
|
LOGGER.infof("Failback interrupted: server is already stopping");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
coordinatedLockAndNodeId = activeMQServer.getNodeManager().getNodeId().toString();
|
||||||
|
inSyncReplicaActivation = activeMQServer.getNodeManager().getNodeActivationSequence();
|
||||||
|
// none can notice a concurrent drop of replica connection here: awaitNextCommittedActivationSequence defensively
|
||||||
|
// wait FAILBACK_TIMEOUT_MILLIS, proceed as backup and compete to become live again
|
||||||
activeMQServer.fail(true);
|
activeMQServer.fail(true);
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
distributedManager.start();
|
||||||
|
if (!awaitNextCommittedActivationSequence(distributedManager, coordinatedLockAndNodeId, inSyncReplicaActivation, FAILBACK_TIMEOUT_MILLIS, LOGGER)) {
|
||||||
|
LOGGER.warnf("Timed out waiting for failback server activation with NodeID = %s: and sequence > %d: after %dms",
|
||||||
|
coordinatedLockAndNodeId, inSyncReplicaActivation, FAILBACK_TIMEOUT_MILLIS);
|
||||||
|
}
|
||||||
|
} catch (UnavailableStateException ignored) {
|
||||||
|
LOGGER.debug("Unavailable distributed manager while awaiting failback activation sequence: ignored", ignored);
|
||||||
|
} finally {
|
||||||
|
distributedManager.stop();
|
||||||
|
}
|
||||||
ActiveMQServerLogger.LOGGER.restartingReplicatedBackupAfterFailback();
|
ActiveMQServerLogger.LOGGER.restartingReplicatedBackupAfterFailback();
|
||||||
activeMQServer.setHAPolicy(policy.getBackupPolicy());
|
activeMQServer.setHAPolicy(policy.getBackupPolicy());
|
||||||
activeMQServer.start();
|
activeMQServer.start();
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
private void asyncStopServer() {
|
private void asyncStopServer() {
|
||||||
if (stoppingServer) {
|
if (stoppingServer.get()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
synchronized (this) {
|
if (stoppingServer.compareAndSet(false, true)) {
|
||||||
if (stoppingServer) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
stoppingServer = true;
|
|
||||||
new Thread(() -> {
|
new Thread(() -> {
|
||||||
try {
|
try {
|
||||||
activeMQServer.stop();
|
activeMQServer.stop();
|
||||||
|
@ -374,19 +356,28 @@ public class ReplicationPrimaryActivation extends LiveActivation implements Dist
|
||||||
private void onReplicationConnectionClose() {
|
private void onReplicationConnectionClose() {
|
||||||
ExecutorService executorService = activeMQServer.getThreadPool();
|
ExecutorService executorService = activeMQServer.getThreadPool();
|
||||||
if (executorService != null) {
|
if (executorService != null) {
|
||||||
synchronized (replicationLock) {
|
if (stoppingServer.get()) {
|
||||||
if (replicationManager == null) {
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
executorService.execute(() -> {
|
executorService.execute(() -> {
|
||||||
synchronized (replicationLock) {
|
synchronized (replicationLock) {
|
||||||
if (replicationManager == null) {
|
if (replicationManager == null) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
// we increment only if we are staying alive
|
||||||
|
if (!stoppingServer.get() && STARTED.equals(activeMQServer.getState())) {
|
||||||
|
try {
|
||||||
|
ensureSequentialAccessToNodeData(activeMQServer, distributedManager, LOGGER);
|
||||||
|
} catch (Throwable fatal) {
|
||||||
|
LOGGER.errorf(fatal, "Unexpected exception: %s on attempted activation sequence increment; stopping server async", fatal.getLocalizedMessage());
|
||||||
|
asyncStopServer();
|
||||||
|
}
|
||||||
|
}
|
||||||
// this is going to stop the replication manager
|
// this is going to stop the replication manager
|
||||||
activeMQServer.getStorageManager().stopReplication();
|
final StorageManager storageManager = activeMQServer.getStorageManager();
|
||||||
assert !replicationManager.isStarted();
|
if (storageManager != null) {
|
||||||
|
storageManager.stopReplication();
|
||||||
|
}
|
||||||
replicationManager = null;
|
replicationManager = null;
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
|
@ -0,0 +1,312 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.activemq.artemis.core.server.impl.quorum;
|
||||||
|
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.concurrent.ExecutionException;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.concurrent.TimeoutException;
|
||||||
|
|
||||||
|
import org.apache.activemq.artemis.api.core.ActiveMQException;
|
||||||
|
import org.apache.activemq.artemis.core.server.ActiveMQServer;
|
||||||
|
import org.apache.activemq.artemis.core.server.NodeManager;
|
||||||
|
import org.apache.activemq.artemis.quorum.DistributedLock;
|
||||||
|
import org.apache.activemq.artemis.quorum.DistributedPrimitiveManager;
|
||||||
|
import org.apache.activemq.artemis.quorum.MutableLong;
|
||||||
|
import org.apache.activemq.artemis.quorum.UnavailableStateException;
|
||||||
|
import org.jboss.logging.Logger;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This class contains the activation sequence logic of the pluggable quorum vote:
|
||||||
|
* it should be used by {@link org.apache.activemq.artemis.core.server.impl.ReplicationBackupActivation}
|
||||||
|
* and {@link org.apache.activemq.artemis.core.server.impl.ReplicationPrimaryActivation} to coordinate
|
||||||
|
* for replication.
|
||||||
|
*/
|
||||||
|
public final class ActivationSequenceStateMachine {
|
||||||
|
|
||||||
|
private static final long CHECK_ACTIVATION_SEQUENCE_WAIT_MILLIS = 200;
|
||||||
|
private static final long CHECK_REPAIRED_ACTIVATION_SEQUENCE_WAIT_MILLIS = 2000;
|
||||||
|
private static final long LIVE_LOCK_ACQUIRE_TIMEOUT_MILLIS = 2000;
|
||||||
|
|
||||||
|
private ActivationSequenceStateMachine() {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* It loops if the data of the broker is still valuable, but cannot become live.
|
||||||
|
* It loops (temporarly) if data is in sync or can self-heal, but cannot yet acquire the live lock.
|
||||||
|
* <p>
|
||||||
|
* It stops loop and return:
|
||||||
|
* <p><ul>
|
||||||
|
* <li>{@code null}: if data is stale (and there are no rights to become live)
|
||||||
|
* <li>{@code !=null}: if data is in sync and the {@link DistributedLock} is correctly acquired
|
||||||
|
* </ul><p>
|
||||||
|
* <p>
|
||||||
|
* After successfully returning from this method ie not null return value, a broker should use
|
||||||
|
* {@link #ensureSequentialAccessToNodeData(ActiveMQServer, DistributedPrimitiveManager, Logger)} to complete
|
||||||
|
* the activation and guarantee the initial not-replicated ownership of data.
|
||||||
|
*/
|
||||||
|
public static DistributedLock tryActivate(final String nodeId,
|
||||||
|
final long nodeActivationSequence,
|
||||||
|
final DistributedPrimitiveManager distributedManager,
|
||||||
|
final Logger logger) throws InterruptedException, ExecutionException, TimeoutException, UnavailableStateException {
|
||||||
|
final DistributedLock activationLock = distributedManager.getDistributedLock(nodeId);
|
||||||
|
try (MutableLong coordinatedNodeSequence = distributedManager.getMutableLong(nodeId)) {
|
||||||
|
while (true) {
|
||||||
|
// dirty read is sufficient to know if we are *not* an in sync replica
|
||||||
|
// typically the lock owner will increment to signal our data is stale and we are happy without any
|
||||||
|
// further coordination at this point
|
||||||
|
switch (validateActivationSequence(coordinatedNodeSequence, activationLock, nodeId, nodeActivationSequence, logger)) {
|
||||||
|
|
||||||
|
case Stale:
|
||||||
|
activationLock.close();
|
||||||
|
return null;
|
||||||
|
case SelfRepair:
|
||||||
|
case InSync:
|
||||||
|
break;
|
||||||
|
case MaybeInSync:
|
||||||
|
if (activationLock.tryLock()) {
|
||||||
|
// BAD: where's the broker that should commit it?
|
||||||
|
activationLock.unlock();
|
||||||
|
logger.warnf("Cannot assume live role for NodeID = %s: claimed activation sequence need to be repaired",
|
||||||
|
nodeId);
|
||||||
|
TimeUnit.MILLISECONDS.sleep(CHECK_REPAIRED_ACTIVATION_SEQUENCE_WAIT_MILLIS);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// quick path while data is still valuable: wait until something change (commit/repair)
|
||||||
|
TimeUnit.MILLISECONDS.sleep(CHECK_ACTIVATION_SEQUENCE_WAIT_MILLIS);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// SelfRepair, InSync
|
||||||
|
if (!activationLock.tryLock(LIVE_LOCK_ACQUIRE_TIMEOUT_MILLIS, TimeUnit.MILLISECONDS)) {
|
||||||
|
logger.debugf("Candidate for Node ID = %s, with local activation sequence: %d, cannot acquire live lock within %dms; retrying",
|
||||||
|
nodeId, nodeActivationSequence, LIVE_LOCK_ACQUIRE_TIMEOUT_MILLIS);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
switch (validateActivationSequence(coordinatedNodeSequence, activationLock, nodeId, nodeActivationSequence, logger)) {
|
||||||
|
|
||||||
|
case Stale:
|
||||||
|
activationLock.close();
|
||||||
|
return null;
|
||||||
|
case SelfRepair:
|
||||||
|
// Self-repair sequence ie we were the only one with the most up to date data.
|
||||||
|
// NOTE: We cannot move the sequence now, let's delay it on ensureSequentialAccessToNodeData
|
||||||
|
logger.infof("Assuming live role for NodeID = %s: local activation sequence %d matches claimed coordinated activation sequence %d. Repairing sequence", nodeId, nodeActivationSequence, nodeActivationSequence);
|
||||||
|
return activationLock;
|
||||||
|
case InSync:
|
||||||
|
// we are an in_sync_replica, good to go live as UNREPLICATED
|
||||||
|
logger.infof("Assuming live role for NodeID = %s, local activation sequence %d matches current coordinated activation sequence %d", nodeId, nodeActivationSequence, nodeActivationSequence);
|
||||||
|
return activationLock;
|
||||||
|
case MaybeInSync:
|
||||||
|
activationLock.unlock();
|
||||||
|
logger.warnf("Cannot assume live role for NodeID = %s: claimed activation sequence need to be repaired", nodeId);
|
||||||
|
TimeUnit.MILLISECONDS.sleep(CHECK_REPAIRED_ACTIVATION_SEQUENCE_WAIT_MILLIS);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private enum ValidationResult {
|
||||||
|
/**
|
||||||
|
* coordinated activation sequence (claimed/committed) is far beyond the local one: data is not valuable anymore
|
||||||
|
**/
|
||||||
|
Stale,
|
||||||
|
/**
|
||||||
|
* coordinated activation sequence is the same as local one: data is in sync
|
||||||
|
**/
|
||||||
|
InSync,
|
||||||
|
/**
|
||||||
|
* next coordinated activation sequence is not committed yet: maybe data is in sync
|
||||||
|
**/
|
||||||
|
MaybeInSync,
|
||||||
|
/**
|
||||||
|
* next coordinated activation sequence is not committed yet, but this broker can self-repair: data is in sync
|
||||||
|
**/
|
||||||
|
SelfRepair
|
||||||
|
}
|
||||||
|
|
||||||
|
private static ValidationResult validateActivationSequence(final MutableLong coordinatedNodeSequence,
|
||||||
|
final DistributedLock activationLock,
|
||||||
|
final String lockAndLongId,
|
||||||
|
final long nodeActivationSequence,
|
||||||
|
final Logger logger) throws UnavailableStateException {
|
||||||
|
assert coordinatedNodeSequence.getMutableLongId().equals(lockAndLongId);
|
||||||
|
assert activationLock.getLockId().equals(lockAndLongId);
|
||||||
|
final long currentCoordinatedNodeSequence = coordinatedNodeSequence.get();
|
||||||
|
if (nodeActivationSequence == currentCoordinatedNodeSequence) {
|
||||||
|
return ValidationResult.InSync;
|
||||||
|
}
|
||||||
|
if (currentCoordinatedNodeSequence > 0) {
|
||||||
|
logger.infof("Not a candidate for NodeID = %s activation, local activation sequence %d does not match coordinated activation sequence %d",
|
||||||
|
lockAndLongId, nodeActivationSequence, currentCoordinatedNodeSequence);
|
||||||
|
return ValidationResult.Stale;
|
||||||
|
}
|
||||||
|
// claimed activation sequence
|
||||||
|
final long claimedCoordinatedNodeSequence = -currentCoordinatedNodeSequence;
|
||||||
|
final long sequenceGap = claimedCoordinatedNodeSequence - nodeActivationSequence;
|
||||||
|
if (sequenceGap == 0) {
|
||||||
|
return ValidationResult.SelfRepair;
|
||||||
|
}
|
||||||
|
if (sequenceGap == 1) {
|
||||||
|
// maybe data is still valuable
|
||||||
|
return ValidationResult.MaybeInSync;
|
||||||
|
}
|
||||||
|
assert sequenceGap > 1;
|
||||||
|
// sequence is moved so much that data is no longer valuable
|
||||||
|
logger.infof("Not a candidate for NodeID = %s activation, local activation sequence %d does not match coordinated activation sequence %d",
|
||||||
|
lockAndLongId, nodeActivationSequence, claimedCoordinatedNodeSequence);
|
||||||
|
return ValidationResult.Stale;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* It wait until {@code timeoutMillis ms} has passed or the coordinated activation sequence has progressed enough
|
||||||
|
*/
|
||||||
|
public static boolean awaitNextCommittedActivationSequence(final DistributedPrimitiveManager distributedManager,
|
||||||
|
final String coordinatedLockAndNodeId,
|
||||||
|
final long activationSequence,
|
||||||
|
final long timeoutMills,
|
||||||
|
final Logger logger)
|
||||||
|
throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
|
||||||
|
Objects.requireNonNull(distributedManager);
|
||||||
|
Objects.requireNonNull(logger);
|
||||||
|
Objects.requireNonNull(coordinatedLockAndNodeId);
|
||||||
|
if (activationSequence < 0) {
|
||||||
|
throw new IllegalArgumentException("activationSequence must be >= 0, while is " + activationSequence);
|
||||||
|
}
|
||||||
|
if (!distributedManager.isStarted()) {
|
||||||
|
throw new IllegalStateException("manager must be started");
|
||||||
|
}
|
||||||
|
final MutableLong coordinatedActivationSequence = distributedManager.getMutableLong(coordinatedLockAndNodeId);
|
||||||
|
// wait for the live to activate and run un replicated with a sequence > inSyncReplicaActivation
|
||||||
|
// this read can be dirty b/c we are just looking for an increment.
|
||||||
|
boolean anyNext = false;
|
||||||
|
final long timeoutNs = TimeUnit.MILLISECONDS.toNanos(timeoutMills);
|
||||||
|
final long started = System.nanoTime();
|
||||||
|
long elapsedNs;
|
||||||
|
do {
|
||||||
|
final long coordinatedValue = coordinatedActivationSequence.get();
|
||||||
|
if (coordinatedValue > activationSequence) {
|
||||||
|
// all good, some activation has gone ahead
|
||||||
|
logger.infof("Detected a new activation sequence with NodeID = %s: and sequence: %d", coordinatedLockAndNodeId, coordinatedValue);
|
||||||
|
anyNext = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (coordinatedValue < 0) {
|
||||||
|
// commit claim
|
||||||
|
final long claimedSequence = -coordinatedValue;
|
||||||
|
final long activationsGap = claimedSequence - activationSequence;
|
||||||
|
if (activationsGap > 1) {
|
||||||
|
// all good, some activation has gone ahead
|
||||||
|
logger.infof("Detected furthers sequential server activations from sequence %d, with NodeID = %s: and claimed sequence: %d", activationSequence, coordinatedLockAndNodeId, claimedSequence);
|
||||||
|
anyNext = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// activation is still in progress
|
||||||
|
logger.debugf("Detected claiming of activation sequence = %d for NodeID = %s", claimedSequence, coordinatedLockAndNodeId);
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
TimeUnit.MILLISECONDS.sleep(CHECK_ACTIVATION_SEQUENCE_WAIT_MILLIS);
|
||||||
|
} catch (InterruptedException ignored) {
|
||||||
|
}
|
||||||
|
elapsedNs = System.nanoTime() - started;
|
||||||
|
}
|
||||||
|
while (elapsedNs < timeoutNs);
|
||||||
|
return anyNext;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is going to increment the coordinated activation sequence while holding the live lock, failing with some exception otherwise.<br>
|
||||||
|
* <p>
|
||||||
|
* The acceptable states are {@link ValidationResult#InSync} and {@link ValidationResult#SelfRepair}, throwing some exception otherwise.
|
||||||
|
* <p>
|
||||||
|
* This must be used while holding a live lock to ensure not-exclusive ownership of data ie can be both used
|
||||||
|
* while loosing connectivity with a replica or after successfully {@link #tryActivate(String, long, DistributedPrimitiveManager, Logger)}.
|
||||||
|
*/
|
||||||
|
public static void ensureSequentialAccessToNodeData(ActiveMQServer activeMQServer,
|
||||||
|
DistributedPrimitiveManager distributedPrimitiveManager,
|
||||||
|
final Logger logger) throws ActiveMQException, InterruptedException, UnavailableStateException, ExecutionException, TimeoutException {
|
||||||
|
|
||||||
|
final NodeManager nodeManager = activeMQServer.getNodeManager();
|
||||||
|
final String lockAndLongId = nodeManager.getNodeId().toString();
|
||||||
|
final DistributedLock liveLock = distributedPrimitiveManager.getDistributedLock(lockAndLongId);
|
||||||
|
if (!liveLock.isHeldByCaller()) {
|
||||||
|
final String message = String.format("Server [%s], live lock for NodeID = %s, not held, activation sequence cannot be safely changed",
|
||||||
|
activeMQServer, lockAndLongId);
|
||||||
|
logger.info(message);
|
||||||
|
throw new UnavailableStateException(message);
|
||||||
|
}
|
||||||
|
final long nodeActivationSequence = nodeManager.readNodeActivationSequence();
|
||||||
|
final MutableLong coordinatedNodeActivationSequence = distributedPrimitiveManager.getMutableLong(lockAndLongId);
|
||||||
|
final long currentCoordinatedActivationSequence = coordinatedNodeActivationSequence.get();
|
||||||
|
final long nextActivationSequence;
|
||||||
|
if (currentCoordinatedActivationSequence < 0) {
|
||||||
|
// Check Self-Repair
|
||||||
|
if (nodeActivationSequence != -currentCoordinatedActivationSequence) {
|
||||||
|
final String message = String.format("Server [%s], cannot assume live role for NodeID = %s, local activation sequence %d does not match current claimed coordinated sequence %d: need repair",
|
||||||
|
activeMQServer, lockAndLongId, nodeActivationSequence, -currentCoordinatedActivationSequence);
|
||||||
|
logger.info(message);
|
||||||
|
throw new ActiveMQException(message);
|
||||||
|
}
|
||||||
|
// auto-repair: this is the same server that failed to commit its claimed sequence
|
||||||
|
nextActivationSequence = nodeActivationSequence;
|
||||||
|
} else {
|
||||||
|
// Check InSync
|
||||||
|
if (nodeActivationSequence != currentCoordinatedActivationSequence) {
|
||||||
|
final String message = String.format("Server [%s], cannot assume live role for NodeID = %s, local activation sequence %d does not match current coordinated sequence %d",
|
||||||
|
activeMQServer, lockAndLongId, nodeActivationSequence, currentCoordinatedActivationSequence);
|
||||||
|
logger.info(message);
|
||||||
|
throw new ActiveMQException(message);
|
||||||
|
}
|
||||||
|
nextActivationSequence = nodeActivationSequence + 1;
|
||||||
|
}
|
||||||
|
// UN_REPLICATED STATE ENTER: auto-repair doesn't need to claim and write locally
|
||||||
|
if (nodeActivationSequence != nextActivationSequence) {
|
||||||
|
// claim
|
||||||
|
if (!coordinatedNodeActivationSequence.compareAndSet(nodeActivationSequence, -nextActivationSequence)) {
|
||||||
|
final String message = String.format("Server [%s], cannot assume live role for NodeID = %s, activation sequence claim failed, local activation sequence %d no longer matches current coordinated sequence %d",
|
||||||
|
activeMQServer, lockAndLongId, nodeActivationSequence, coordinatedNodeActivationSequence.get());
|
||||||
|
logger.infof(message);
|
||||||
|
throw new ActiveMQException(message);
|
||||||
|
}
|
||||||
|
// claim success: write locally
|
||||||
|
try {
|
||||||
|
nodeManager.writeNodeActivationSequence(nextActivationSequence);
|
||||||
|
} catch (NodeManager.NodeManagerException fatal) {
|
||||||
|
logger.errorf("Server [%s] failed to set local activation sequence to: %d for NodeId =%s. Cannot continue committing coordinated activation sequence: REQUIRES ADMIN INTERVENTION",
|
||||||
|
activeMQServer, nextActivationSequence, lockAndLongId);
|
||||||
|
throw new UnavailableStateException(fatal);
|
||||||
|
}
|
||||||
|
logger.infof("Server [%s], incremented local activation sequence to: %d for NodeId = %s",
|
||||||
|
activeMQServer, nextActivationSequence, lockAndLongId);
|
||||||
|
} else {
|
||||||
|
// self-heal need to update the in-memory sequence, because no writes will do it
|
||||||
|
nodeManager.setNodeActivationSequence(nextActivationSequence);
|
||||||
|
}
|
||||||
|
// commit
|
||||||
|
if (!coordinatedNodeActivationSequence.compareAndSet(-nextActivationSequence, nextActivationSequence)) {
|
||||||
|
final String message = String.format("Server [%s], cannot assume live role for NodeID = %s, activation sequence commit failed, local activation sequence %d no longer matches current coordinated sequence %d",
|
||||||
|
activeMQServer, lockAndLongId, nodeActivationSequence, coordinatedNodeActivationSequence.get());
|
||||||
|
logger.infof(message);
|
||||||
|
throw new ActiveMQException(message);
|
||||||
|
}
|
||||||
|
logger.infof("Server [%s], incremented coordinated activation sequence to: %d for NodeId = %s",
|
||||||
|
activeMQServer, nextActivationSequence, lockAndLongId);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -244,8 +244,10 @@ public class ManagementServiceImpl implements ManagementService {
|
||||||
ObjectName objectName = objectNameBuilder.getActiveMQServerObjectName();
|
ObjectName objectName = objectNameBuilder.getActiveMQServerObjectName();
|
||||||
unregisterFromJMX(objectName);
|
unregisterFromJMX(objectName);
|
||||||
unregisterFromRegistry(ResourceNames.BROKER);
|
unregisterFromRegistry(ResourceNames.BROKER);
|
||||||
|
if (messagingServer != null) {
|
||||||
unregisterMeters(ResourceNames.BROKER + "." + messagingServer.getConfiguration().getName());
|
unregisterMeters(ResourceNames.BROKER + "." + messagingServer.getConfiguration().getName());
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void registerAddress(AddressInfo addressInfo) throws Exception {
|
public void registerAddress(AddressInfo addressInfo) throws Exception {
|
||||||
|
|
|
@ -3189,13 +3189,12 @@
|
||||||
</xsd:documentation>
|
</xsd:documentation>
|
||||||
</xsd:annotation>
|
</xsd:annotation>
|
||||||
</xsd:element>
|
</xsd:element>
|
||||||
<xsd:element name="check-for-live-server" type="xsd:boolean" default="false" maxOccurs="1" minOccurs="0">
|
<xsd:element name="coordination-id" type="xsd:string" maxOccurs="1" minOccurs="0">
|
||||||
<xsd:annotation>
|
<xsd:annotation>
|
||||||
<xsd:documentation>
|
<xsd:documentation>
|
||||||
Whether to check the cluster for a (live) server using our own server ID when starting
|
The common identity to use for coordination that is shared across instances that will replicate.
|
||||||
up. This option is only necessary for performing 'fail-back' on replicating
|
The value will be used as the internal server nodeId and as the identity of entities in the
|
||||||
servers. Strictly speaking this setting only applies to live servers and not to
|
distributed-primitive-manager.
|
||||||
backups.
|
|
||||||
</xsd:documentation>
|
</xsd:documentation>
|
||||||
</xsd:annotation>
|
</xsd:annotation>
|
||||||
</xsd:element>
|
</xsd:element>
|
||||||
|
@ -3208,21 +3207,6 @@
|
||||||
</xsd:documentation>
|
</xsd:documentation>
|
||||||
</xsd:annotation>
|
</xsd:annotation>
|
||||||
</xsd:element>
|
</xsd:element>
|
||||||
<xsd:element name="vote-retries" type="xsd:integer" default="12" minOccurs="0" maxOccurs="1">
|
|
||||||
<xsd:annotation>
|
|
||||||
<xsd:documentation>
|
|
||||||
If we start as a replica and lose connection to the master, how many times should we attempt to vote
|
|
||||||
for quorum before restarting
|
|
||||||
</xsd:documentation>
|
|
||||||
</xsd:annotation>
|
|
||||||
</xsd:element>
|
|
||||||
<xsd:element name="vote-retry-wait" type="xsd:long" default="2000" minOccurs="0" maxOccurs="1">
|
|
||||||
<xsd:annotation>
|
|
||||||
<xsd:documentation>
|
|
||||||
How long to wait (in milliseconds) between each vote
|
|
||||||
</xsd:documentation>
|
|
||||||
</xsd:annotation>
|
|
||||||
</xsd:element>
|
|
||||||
<xsd:element name="retry-replication-wait" type="xsd:long" default="2000" minOccurs="0" maxOccurs="1">
|
<xsd:element name="retry-replication-wait" type="xsd:long" default="2000" minOccurs="0" maxOccurs="1">
|
||||||
<xsd:annotation>
|
<xsd:annotation>
|
||||||
<xsd:documentation>
|
<xsd:documentation>
|
||||||
|
@ -3288,20 +3272,6 @@
|
||||||
</xsd:documentation>
|
</xsd:documentation>
|
||||||
</xsd:annotation>
|
</xsd:annotation>
|
||||||
</xsd:element>
|
</xsd:element>
|
||||||
<xsd:element name="vote-retries" type="xsd:integer" default="12" minOccurs="0" maxOccurs="1">
|
|
||||||
<xsd:annotation>
|
|
||||||
<xsd:documentation>
|
|
||||||
If we lose connection to the master, how many times should we attempt to vote for quorum before restarting
|
|
||||||
</xsd:documentation>
|
|
||||||
</xsd:annotation>
|
|
||||||
</xsd:element>
|
|
||||||
<xsd:element name="vote-retry-wait" type="xsd:long" default="2000" minOccurs="0" maxOccurs="1">
|
|
||||||
<xsd:annotation>
|
|
||||||
<xsd:documentation>
|
|
||||||
How long to wait (in milliseconds) between each vote
|
|
||||||
</xsd:documentation>
|
|
||||||
</xsd:annotation>
|
|
||||||
</xsd:element>
|
|
||||||
<xsd:element name="retry-replication-wait" type="xsd:long" default="2000" minOccurs="0" maxOccurs="1">
|
<xsd:element name="retry-replication-wait" type="xsd:long" default="2000" minOccurs="0" maxOccurs="1">
|
||||||
<xsd:annotation>
|
<xsd:annotation>
|
||||||
<xsd:documentation>
|
<xsd:documentation>
|
||||||
|
|
|
@ -283,7 +283,6 @@ public class HAPolicyConfigurationTest extends ActiveMQTestBase {
|
||||||
assertFalse(policy.canScaleDown());
|
assertFalse(policy.canScaleDown());
|
||||||
assertFalse(policy.isBackup());
|
assertFalse(policy.isBackup());
|
||||||
assertFalse(policy.isSharedStore());
|
assertFalse(policy.isSharedStore());
|
||||||
assertTrue(policy.isCheckForLiveServer());
|
|
||||||
assertTrue(policy.isWaitForActivation());
|
assertTrue(policy.isWaitForActivation());
|
||||||
assertEquals("purple", policy.getGroupName());
|
assertEquals("purple", policy.getGroupName());
|
||||||
assertEquals("purple", policy.getBackupGroupName());
|
assertEquals("purple", policy.getBackupGroupName());
|
||||||
|
@ -297,8 +296,6 @@ public class HAPolicyConfigurationTest extends ActiveMQTestBase {
|
||||||
assertEquals(policy.getBackupGroupName(), failbackPolicy.getBackupGroupName());
|
assertEquals(policy.getBackupGroupName(), failbackPolicy.getBackupGroupName());
|
||||||
assertEquals(policy.getClusterName(), failbackPolicy.getClusterName());
|
assertEquals(policy.getClusterName(), failbackPolicy.getClusterName());
|
||||||
assertEquals(failbackPolicy.getMaxSavedReplicatedJournalsSize(), ActiveMQDefaultConfiguration.getDefaultMaxSavedReplicatedJournalsSize());
|
assertEquals(failbackPolicy.getMaxSavedReplicatedJournalsSize(), ActiveMQDefaultConfiguration.getDefaultMaxSavedReplicatedJournalsSize());
|
||||||
assertEquals(1, failbackPolicy.getVoteRetries());
|
|
||||||
assertEquals(1000, failbackPolicy.getVoteRetryWait());
|
|
||||||
assertTrue(failbackPolicy.isTryFailback());
|
assertTrue(failbackPolicy.isTryFailback());
|
||||||
assertTrue(failbackPolicy.isBackup());
|
assertTrue(failbackPolicy.isBackup());
|
||||||
assertFalse(failbackPolicy.isSharedStore());
|
assertFalse(failbackPolicy.isSharedStore());
|
||||||
|
@ -337,8 +334,6 @@ public class HAPolicyConfigurationTest extends ActiveMQTestBase {
|
||||||
assertEquals("tiddles", policy.getBackupGroupName());
|
assertEquals("tiddles", policy.getBackupGroupName());
|
||||||
assertEquals("33rrrrr", policy.getClusterName());
|
assertEquals("33rrrrr", policy.getClusterName());
|
||||||
assertEquals(22, policy.getMaxSavedReplicatedJournalsSize());
|
assertEquals(22, policy.getMaxSavedReplicatedJournalsSize());
|
||||||
assertEquals(1, policy.getVoteRetries());
|
|
||||||
assertEquals(1000, policy.getVoteRetryWait());
|
|
||||||
assertFalse(policy.isTryFailback());
|
assertFalse(policy.isTryFailback());
|
||||||
assertTrue(policy.isBackup());
|
assertTrue(policy.isBackup());
|
||||||
assertFalse(policy.isSharedStore());
|
assertFalse(policy.isSharedStore());
|
||||||
|
@ -358,7 +353,6 @@ public class HAPolicyConfigurationTest extends ActiveMQTestBase {
|
||||||
assertFalse(failoverLivePolicy.canScaleDown());
|
assertFalse(failoverLivePolicy.canScaleDown());
|
||||||
assertFalse(failoverLivePolicy.isBackup());
|
assertFalse(failoverLivePolicy.isBackup());
|
||||||
assertFalse(failoverLivePolicy.isSharedStore());
|
assertFalse(failoverLivePolicy.isSharedStore());
|
||||||
assertFalse(failoverLivePolicy.isCheckForLiveServer());
|
|
||||||
assertTrue(failoverLivePolicy.isWaitForActivation());
|
assertTrue(failoverLivePolicy.isWaitForActivation());
|
||||||
assertEquals(policy.getGroupName(), failoverLivePolicy.getGroupName());
|
assertEquals(policy.getGroupName(), failoverLivePolicy.getGroupName());
|
||||||
assertEquals(policy.getClusterName(), failoverLivePolicy.getClusterName());
|
assertEquals(policy.getClusterName(), failoverLivePolicy.getClusterName());
|
||||||
|
|
|
@ -30,8 +30,6 @@
|
||||||
<cluster-name>33rrrrr</cluster-name>
|
<cluster-name>33rrrrr</cluster-name>
|
||||||
<initial-replication-sync-timeout>9876</initial-replication-sync-timeout>
|
<initial-replication-sync-timeout>9876</initial-replication-sync-timeout>
|
||||||
<retry-replication-wait>12345</retry-replication-wait>
|
<retry-replication-wait>12345</retry-replication-wait>
|
||||||
<vote-retries>1</vote-retries>
|
|
||||||
<vote-retry-wait>1000</vote-retry-wait>
|
|
||||||
<allow-failback>false</allow-failback>
|
<allow-failback>false</allow-failback>
|
||||||
<manager>
|
<manager>
|
||||||
<class-name>
|
<class-name>
|
||||||
|
|
|
@ -27,9 +27,6 @@
|
||||||
<cluster-name>abcdefg</cluster-name>
|
<cluster-name>abcdefg</cluster-name>
|
||||||
<initial-replication-sync-timeout>9876</initial-replication-sync-timeout>
|
<initial-replication-sync-timeout>9876</initial-replication-sync-timeout>
|
||||||
<retry-replication-wait>12345</retry-replication-wait>
|
<retry-replication-wait>12345</retry-replication-wait>
|
||||||
<check-for-live-server>true</check-for-live-server>
|
|
||||||
<vote-retries>1</vote-retries>
|
|
||||||
<vote-retry-wait>1000</vote-retry-wait>
|
|
||||||
<manager>
|
<manager>
|
||||||
<class-name>
|
<class-name>
|
||||||
org.apache.activemq.artemis.core.config.impl.HAPolicyConfigurationTest$FakeDistributedPrimitiveManager
|
org.apache.activemq.artemis.core.config.impl.HAPolicyConfigurationTest$FakeDistributedPrimitiveManager
|
||||||
|
|
|
@ -65,8 +65,8 @@ which we will cover in a later chapter.
|
||||||
> message data will not be available after failover.
|
> message data will not be available after failover.
|
||||||
|
|
||||||
The `ha-policy` type configures which strategy a cluster should use to
|
The `ha-policy` type configures which strategy a cluster should use to
|
||||||
provide the backing up of a servers data. Within this configuration
|
provide the backing up of a server's data. Within this configuration
|
||||||
element is configured how a server should behave within the cluster,
|
element we configure how a server should behave within the cluster,
|
||||||
either as a master (live), slave (backup) or colocated (both live and
|
either as a master (live), slave (backup) or colocated (both live and
|
||||||
backup). This would look something like:
|
backup). This would look something like:
|
||||||
|
|
||||||
|
@ -98,7 +98,7 @@ or
|
||||||
</ha-policy>
|
</ha-policy>
|
||||||
```
|
```
|
||||||
|
|
||||||
*Replication* allows too to configure 2 new roles to enable *pluggable quorum* provider configuration, by using:
|
*Replication* allows the configuration of two new roles to enable *pluggable quorum* provider configuration, by using:
|
||||||
```xml
|
```xml
|
||||||
<ha-policy>
|
<ha-policy>
|
||||||
<replication>
|
<replication>
|
||||||
|
@ -116,17 +116,14 @@ to configure the classic *master* role, and
|
||||||
```
|
```
|
||||||
for the classic *slave* one.
|
for the classic *slave* one.
|
||||||
|
|
||||||
If *replication* is configured using such new roles some additional element is required to complete configuration, detailed later.
|
If *replication* is configured using such new roles some additional element are required to complete configuration as detailed later.
|
||||||
|
|
||||||
### IMPORTANT NOTE ON PLUGGABLE QUORUM VOTE FEATURE
|
### IMPORTANT NOTE ON PLUGGABLE QUORUM VOTE FEATURE
|
||||||
|
|
||||||
This feature is still **EXPERIMENTAL** and not meant to be run in production yet.
|
This feature is still **EXPERIMENTAL**. Extra testing should be done before running this feature into production. Please raise issues eventually found to the ActiveMQ Artemis Mail Lists.
|
||||||
|
|
||||||
It means:
|
It means:
|
||||||
- its configuration can change until declared as **officially stable**
|
- it's configuration can change until declared as **officially stable**
|
||||||
- it has to solve yet an inherent data misalignment issue with replication (it can happen with `classic` replication as well)
|
|
||||||
|
|
||||||
More info about this issue are on [ARTEMIS-3340](https://issues.apache.org/jira/browse/ARTEMIS-3340).
|
|
||||||
|
|
||||||
### Data Replication
|
### Data Replication
|
||||||
|
|
||||||
|
@ -226,12 +223,12 @@ changes and repeats the process.
|
||||||
> live server by changing `slave` to `master`.
|
> live server by changing `slave` to `master`.
|
||||||
|
|
||||||
Much like in the shared-store case, when the live server stops or
|
Much like in the shared-store case, when the live server stops or
|
||||||
crashes, its replicating backup will become active and take over its
|
crashes, it's replicating backup will become active and take over its
|
||||||
duties. Specifically, the backup will become active when it loses
|
duties. Specifically, the backup will become active when it loses
|
||||||
connection to its live server. This can be problematic because this can
|
connection to its live server. This can be problematic because it can
|
||||||
also happen because of a temporary network problem.
|
also happen as the result of temporary network problem.
|
||||||
|
|
||||||
This issue is solved in 2 different ways depending on which replication roles are configured:
|
The issue can be solved in two different ways, depending on which replication roles are configured:
|
||||||
- **classic replication** (`master`/`slave` roles): backup will try to determine whether it still can
|
- **classic replication** (`master`/`slave` roles): backup will try to determine whether it still can
|
||||||
connect to the other servers in the cluster. If it can connect to more
|
connect to the other servers in the cluster. If it can connect to more
|
||||||
than half the servers, it will become active, if more than half the
|
than half the servers, it will become active, if more than half the
|
||||||
|
@ -275,7 +272,7 @@ The backup server must be similarly configured but as a `slave`
|
||||||
</ha-policy>
|
</ha-policy>
|
||||||
```
|
```
|
||||||
|
|
||||||
To configure a pluggable quorum replication's primary and backup instead:
|
To configure a pluggable quorum replication's primary and backup use:
|
||||||
|
|
||||||
```xml
|
```xml
|
||||||
<ha-policy>
|
<ha-policy>
|
||||||
|
@ -391,7 +388,6 @@ For `primary`:
|
||||||
<property key="connect-string" value="127.0.0.1:6666,127.0.0.1:6667,127.0.0.1:6668"/>
|
<property key="connect-string" value="127.0.0.1:6666,127.0.0.1:6667,127.0.0.1:6668"/>
|
||||||
</properties>
|
</properties>
|
||||||
</manager>
|
</manager>
|
||||||
<check-for-live-server>true</check-for-live-server>
|
|
||||||
</primary>
|
</primary>
|
||||||
</replication>
|
</replication>
|
||||||
</ha-policy>
|
</ha-policy>
|
||||||
|
@ -418,16 +414,16 @@ The configuration of `class-name` as follows
|
||||||
```
|
```
|
||||||
isn't really needed, because Apache Curator is the default provider, but has been shown for completeness.
|
isn't really needed, because Apache Curator is the default provider, but has been shown for completeness.
|
||||||
|
|
||||||
The `properties` element, instead
|
The `properties` element:
|
||||||
```xml
|
```xml
|
||||||
<properties>
|
<properties>
|
||||||
<property key="connect-string" value="127.0.0.1:6666,127.0.0.1:6667,127.0.0.1:6668"/>
|
<property key="connect-string" value="127.0.0.1:6666,127.0.0.1:6667,127.0.0.1:6668"/>
|
||||||
</properties>
|
</properties>
|
||||||
```
|
```
|
||||||
Can specify a list of `property` elements in the form of key-value pairs, depending the ones
|
can specify a list of `property` elements in the form of key-value pairs, appropriate to what is
|
||||||
accepted by the specified `class-name` provider.
|
supported by the specified `class-name` provider.
|
||||||
|
|
||||||
Apache Curator's provider allow to configure these properties:
|
Apache Curator's provider allows the following properties:
|
||||||
|
|
||||||
- [`connect-string`](https://curator.apache.org/apidocs/org/apache/curator/framework/CuratorFrameworkFactory.Builder.html#connectString(java.lang.String)): (no default)
|
- [`connect-string`](https://curator.apache.org/apidocs/org/apache/curator/framework/CuratorFrameworkFactory.Builder.html#connectString(java.lang.String)): (no default)
|
||||||
- [`session-ms`](https://curator.apache.org/apidocs/org/apache/curator/framework/CuratorFrameworkFactory.Builder.html#sessionTimeoutMs(int)): (default is 18000 ms)
|
- [`session-ms`](https://curator.apache.org/apidocs/org/apache/curator/framework/CuratorFrameworkFactory.Builder.html#sessionTimeoutMs(int)): (default is 18000 ms)
|
||||||
|
@ -438,36 +434,25 @@ Apache Curator's provider allow to configure these properties:
|
||||||
- [`retries-ms`](https://curator.apache.org/apidocs/org/apache/curator/retry/RetryNTimes.html#%3Cinit%3E(int,int)): (default is 1000 ms)
|
- [`retries-ms`](https://curator.apache.org/apidocs/org/apache/curator/retry/RetryNTimes.html#%3Cinit%3E(int,int)): (default is 1000 ms)
|
||||||
- [`namespace`](https://curator.apache.org/apidocs/org/apache/curator/framework/CuratorFrameworkFactory.Builder.html#namespace(java.lang.String)): (no default)
|
- [`namespace`](https://curator.apache.org/apidocs/org/apache/curator/framework/CuratorFrameworkFactory.Builder.html#namespace(java.lang.String)): (no default)
|
||||||
|
|
||||||
Configuration of the [Apache Zookeeper](https://zookeeper.apache.org/) nodes is left to the user, but there are few
|
Configuration of the [Apache Zookeeper](https://zookeeper.apache.org/) ensemble is the responsibility of the user, but there are few
|
||||||
**suggestions to improve the reliability of the quorum service**:
|
**suggestions to improve the reliability of the quorum service**:
|
||||||
- broker `session_ms` must be `>= 2 * server tick time` and `<= 20 * server tick time` as by
|
- broker `session_ms` must be `>= 2 * server tick time` and `<= 20 * server tick time` as by
|
||||||
[Zookeeper 3.6.3 admin guide](https://zookeeper.apache.org/doc/r3.6.3/zookeeperAdmin.html): it directly impacts how fast a backup
|
[Zookeeper 3.6.3 admin guide](https://zookeeper.apache.org/doc/r3.6.3/zookeeperAdmin.html): it directly impacts how fast a backup
|
||||||
can failover to an isolated/killed/unresponsive live; the higher, the slower.
|
can failover to an isolated/killed/unresponsive live; the higher, the slower.
|
||||||
- GC on broker machine should allow keeping GC pauses within 1/3 of `session_ms` in order to let the Zookeeper heartbeat protocol
|
- GC on broker machine should allow keeping GC pauses within 1/3 of `session_ms` in order to let the Zookeeper heartbeat protocol
|
||||||
to work reliably: if it's not possible, better increase `session_ms` accepting a slower failover
|
work reliably. If that is not possible, it is better to increase `session_ms`, accepting a slower failover.
|
||||||
- Zookeeper must have enough resources to keep GC (and OS) pauses much smaller than server tick time: please consider carefully if
|
- Zookeeper must have enough resources to keep GC (and OS) pauses much smaller than server tick time: please consider carefully if
|
||||||
broker and Zookeeper node should share the same physical machine, depending on the expected load of the broker
|
broker and Zookeeper node should share the same physical machine, depending on the expected load of the broker
|
||||||
- network isolation protection requires configuring >=3 Zookeeper nodes
|
- network isolation protection requires configuring >=3 Zookeeper nodes
|
||||||
|
|
||||||
#### *Important*: Notes on pluggable quorum replication configuration
|
#### *Important*: Notes on pluggable quorum replication configuration
|
||||||
|
|
||||||
The first `classic` replication configuration that won't apply to the pluggable quorum replication
|
There are some no longer needed `classic` replication configurations:
|
||||||
is `vote-on-replication-failure` and configure it produces a startup error: pluggable quorum replication
|
- `vote-on-replication-failure`
|
||||||
always behave like `vote-on-replication-failure` `true` ie shutting down a live broker (and its JVM) in case of quorum loss.
|
- `quorum-vote-wait`
|
||||||
|
- `vote-retries`
|
||||||
The second deprecated `classic` replication configuration is `quorum-vote-wait`: given that the pluggable quorum vote replication
|
- `vote-retries-wait`
|
||||||
requires backup to have an always-on reliable quorum service, there's no need to specify the timeout to reach
|
- `check-for-live-server`
|
||||||
the majority of quorum nodes. A backup remains inactive (ie JVM still up, console too, unable to sync with live, to failover etc etc)
|
|
||||||
until the majority of quorum nodes is reachable again, re-activating if happens.
|
|
||||||
|
|
||||||
The only exception is with primary failing-back to an existing live backup using `<allow-failback>true</allow-failback>`:
|
|
||||||
if the quorum service isn't immediately available the primary (and its JVM) just stop, allowing fail-fast failing-back.
|
|
||||||
|
|
||||||
There are few *semantic differences* of other existing properties:
|
|
||||||
- `vote-retry-wait`: in `classic` replication means how long to wait between each quorum vote try, while with pluggable quorum replication
|
|
||||||
means how long request to failover for each attempt
|
|
||||||
- `vote-retries`: differently from `classic`, the amount of vote attempt is `1 + vote-retries` (with classic is just `vote-retries`).
|
|
||||||
Setting `0` means no retries, leaving backup to still perform an initial attempt.
|
|
||||||
|
|
||||||
**Notes on replication configuration with [Apache curator](https://curator.apache.org/) quorum provider**
|
**Notes on replication configuration with [Apache curator](https://curator.apache.org/) quorum provider**
|
||||||
|
|
||||||
|
@ -479,34 +464,34 @@ For the former case (session expiration with live no longer present), the backup
|
||||||
1. cluster connection PINGs (affected by [connection-ttl](connection-ttl.md) tuning)
|
1. cluster connection PINGs (affected by [connection-ttl](connection-ttl.md) tuning)
|
||||||
2. closed TCP connection notification (depends by TCP configuration and networking stack/topology)
|
2. closed TCP connection notification (depends by TCP configuration and networking stack/topology)
|
||||||
|
|
||||||
These 2 cases have 2 different failover duration depending on different factors:
|
The suggestion is to tune `connection-ttl` low enough to attempt failover as soon as possible, while taking in consideration that
|
||||||
1. `connection-ttl` affect how much time of the expiring `session-ms` is used to just detect a missing live broker: the higher `connection-tt`,
|
the whole fail-over duration cannot last less than the configured `session-ms`.
|
||||||
the slower it reacts; backup can attempt to failover for the remaining `session-ms - connection-ttl`
|
|
||||||
2. `session-ms` expiration is immediately detected: backup must try to failover for >=`session-ms` to be sure to catch
|
|
||||||
the session expiration and complete failover
|
|
||||||
|
|
||||||
The previous comments are meant to suggest to the careful reader that the minimum time to attempt to failover
|
##### Peer or Multi Primary
|
||||||
cannot be below the full `session-ms` expires.
|
With coordination delegated to the quorum service, roles are less important. It is possible to have two peer servers compete
|
||||||
In short, it means
|
for activation; the winner activating as live, the looser taking up a backup role. On restart, 'any' peer server
|
||||||
```
|
with the most up to date journal can activate.
|
||||||
total failover attempt time > session-ms
|
The instances need to know in advance, what identity they will coordinate on.
|
||||||
```
|
In the replication 'primary' ha policy we can explicitly set the 'coordination-id' to a common value for all peers in a cluster.
|
||||||
with
|
|
||||||
```
|
For `multi primary`:
|
||||||
total failover attempt time = vote-retry-wait * (vote-retries + 1)
|
|
||||||
```
|
|
||||||
and by consequence:
|
|
||||||
```
|
|
||||||
vote-retry-wait * (vote-retries + 1) > session-ms
|
|
||||||
```
|
|
||||||
For example with `session-ms = 18000 ms`, safe values for failover timeout are:
|
|
||||||
```xml
|
```xml
|
||||||
<vote-retries>11</vote-retries>
|
<ha-policy>
|
||||||
<vote-retry-wait>2000</vote-retry-wait>
|
<replication>
|
||||||
|
<primary>
|
||||||
|
<manager>
|
||||||
|
<class-name>org.apache.activemq.artemis.quorum.zookeeper.CuratorDistributedPrimitiveManager</class-name>
|
||||||
|
<properties>
|
||||||
|
<property key="connect-string" value="127.0.0.1:6666,127.0.0.1:6667,127.0.0.1:6668"/>
|
||||||
|
</properties>
|
||||||
|
</manager>
|
||||||
|
<coordination-id>peer-journal-001</coordination-id>
|
||||||
|
</primary>
|
||||||
|
</replication>
|
||||||
|
</ha-policy>
|
||||||
```
|
```
|
||||||
Because `11 * 2000 = 22000 ms` that's bigger then `18000 ms`.
|
Note: the string value provided will be converted internally into a 16 byte UUID, so it may not be immediately recognisable or human-readable,
|
||||||
|
however it will ensure that all 'peers' coordinate.
|
||||||
There's no risk that a backup broker will early stop attempting to failover, losing its chance to become live.
|
|
||||||
|
|
||||||
### Shared Store
|
### Shared Store
|
||||||
|
|
||||||
|
@ -637,10 +622,10 @@ another server using its nodeID. If it finds one, it will contact this
|
||||||
server and try to "fail-back". Since this is a remote replication
|
server and try to "fail-back". Since this is a remote replication
|
||||||
scenario, the "starting live" will have to synchronize its data with the
|
scenario, the "starting live" will have to synchronize its data with the
|
||||||
server running with its ID, once they are in sync, it will request the
|
server running with its ID, once they are in sync, it will request the
|
||||||
other server (which it assumes it is a back that has assumed its duties)
|
other server (which it assumes it is a backup that has assumed its duties)
|
||||||
to shutdown for it to take over. This is necessary because otherwise the
|
to shutdown, for it to take over. This is necessary because otherwise the
|
||||||
live server has no means to know whether there was a fail-over or not,
|
live server has no means to know whether there was a fail-over or not,
|
||||||
and if there was if the server that took its duties is still running or
|
and if there was, if the server that took its duties is still running or
|
||||||
not. To configure this option at your `broker.xml`
|
not. To configure this option at your `broker.xml`
|
||||||
configuration file as follows, for classic replication:
|
configuration file as follows, for classic replication:
|
||||||
|
|
||||||
|
@ -663,21 +648,21 @@ And pluggable quorum replication:
|
||||||
<!-- some meaningful configuration -->
|
<!-- some meaningful configuration -->
|
||||||
</manager>
|
</manager>
|
||||||
<primary>
|
<primary>
|
||||||
<check-for-live-server>true</check-for-live-server>
|
<!-- no need to check-for-live-server anymore -->
|
||||||
</primary>
|
</primary>
|
||||||
</replication>
|
</replication>
|
||||||
</ha-policy>
|
</ha-policy>
|
||||||
```
|
```
|
||||||
|
|
||||||
The key difference from classic replication is that if `master` cannot reach any
|
The key difference from classic replication is that if `master` cannot reach any
|
||||||
live server with its same nodeID, it's going straight to become live, while `primary`
|
live server with its nodeID, it activates unilaterally.
|
||||||
request it to the quorum provider, searching again for any existing live if
|
With `primary`, the responsibilities of coordination are delegated to the quorum provider,
|
||||||
the quorum provider is not available (eg connectivity loss, consensus absence) or
|
there are no unilateral decisions. The `primary` will only activate when
|
||||||
if there's another live broker with the same nodeID alive, in an endless loop.
|
it knows that it has the most up to date version of the journal identified by its nodeID.
|
||||||
|
|
||||||
In short: a started `primary` cannot become live without consensus.
|
In short: a started `primary` cannot become live without consensus.
|
||||||
|
|
||||||
> **Warning**
|
> **Warning for classic replication**
|
||||||
>
|
>
|
||||||
> Be aware that if you restart a live server while after failover has
|
> Be aware that if you restart a live server while after failover has
|
||||||
> occurred then `check-for-live-server` must be set to `true`. If not the live server
|
> occurred then `check-for-live-server` must be set to `true`. If not the live server
|
||||||
|
|
|
@ -62,6 +62,7 @@ under the License.
|
||||||
<module>scale-down</module>
|
<module>scale-down</module>
|
||||||
<module>stop-server-failover</module>
|
<module>stop-server-failover</module>
|
||||||
<module>transaction-failover</module>
|
<module>transaction-failover</module>
|
||||||
|
<module>zookeeper-single-pair-failback</module>
|
||||||
</modules>
|
</modules>
|
||||||
</profile>
|
</profile>
|
||||||
<profile>
|
<profile>
|
||||||
|
@ -81,6 +82,7 @@ under the License.
|
||||||
<module>replicated-transaction-failover</module>
|
<module>replicated-transaction-failover</module>
|
||||||
<module>scale-down</module>
|
<module>scale-down</module>
|
||||||
<module>transaction-failover</module>
|
<module>transaction-failover</module>
|
||||||
|
<module>zookeeper-single-pair-failback</module>
|
||||||
</modules>
|
</modules>
|
||||||
</profile>
|
</profile>
|
||||||
</profiles>
|
</profiles>
|
||||||
|
|
|
@ -0,0 +1,110 @@
|
||||||
|
<?xml version='1.0'?>
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
or more contributor license agreements. See the NOTICE file
|
||||||
|
distributed with this work for additional information
|
||||||
|
regarding copyright ownership. The ASF licenses this file
|
||||||
|
to you under the Apache License, Version 2.0 (the
|
||||||
|
"License"); you may not use this file except in compliance
|
||||||
|
with the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing,
|
||||||
|
software distributed under the License is distributed on an
|
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
KIND, either express or implied. See the License for the
|
||||||
|
specific language governing permissions and limitations
|
||||||
|
under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
<parent>
|
||||||
|
<groupId>org.apache.activemq.examples.failover</groupId>
|
||||||
|
<artifactId>broker-failover</artifactId>
|
||||||
|
<version>2.18.0-SNAPSHOT</version>
|
||||||
|
</parent>
|
||||||
|
|
||||||
|
<artifactId>zookeeper-single-pair-ordered-failback</artifactId>
|
||||||
|
<packaging>jar</packaging>
|
||||||
|
<name>ActiveMQ Artemis Zookeeper Single Pair Ordered Failback Example</name>
|
||||||
|
|
||||||
|
<properties>
|
||||||
|
<activemq.basedir>${project.basedir}/../../../..</activemq.basedir>
|
||||||
|
</properties>
|
||||||
|
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.activemq</groupId>
|
||||||
|
<artifactId>artemis-cli</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>jakarta.jms</groupId>
|
||||||
|
<artifactId>jakarta.jms-api</artifactId>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
|
|
||||||
|
<build>
|
||||||
|
<plugins>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.activemq</groupId>
|
||||||
|
<artifactId>artemis-maven-plugin</artifactId>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<id>create0</id>
|
||||||
|
<goals>
|
||||||
|
<goal>create</goal>
|
||||||
|
</goals>
|
||||||
|
<configuration>
|
||||||
|
<!-- this makes it easier in certain envs -->
|
||||||
|
<javaOptions>-Djava.net.preferIPv4Stack=true</javaOptions>
|
||||||
|
<instance>${basedir}/target/server0</instance>
|
||||||
|
<configuration>${basedir}/target/classes/activemq/server0</configuration>
|
||||||
|
<javaOptions>-Dudp-address=${udp-address}</javaOptions>
|
||||||
|
</configuration>
|
||||||
|
</execution>
|
||||||
|
<execution>
|
||||||
|
<id>create1</id>
|
||||||
|
<goals>
|
||||||
|
<goal>create</goal>
|
||||||
|
</goals>
|
||||||
|
<configuration>
|
||||||
|
<!-- this makes it easier in certain envs -->
|
||||||
|
<javaOptions>-Djava.net.preferIPv4Stack=true</javaOptions>
|
||||||
|
<instance>${basedir}/target/server1</instance>
|
||||||
|
<configuration>${basedir}/target/classes/activemq/server1</configuration>
|
||||||
|
<javaOptions>-Dudp-address=${udp-address}</javaOptions>
|
||||||
|
</configuration>
|
||||||
|
</execution>
|
||||||
|
<execution>
|
||||||
|
<id>runClient</id>
|
||||||
|
<goals>
|
||||||
|
<goal>runClient</goal>
|
||||||
|
</goals>
|
||||||
|
<configuration>
|
||||||
|
<clientClass>org.apache.activemq.artemis.jms.example.ZookeeperSinglePairFailback</clientClass>
|
||||||
|
<args>
|
||||||
|
<param>${basedir}/target/server0</param>
|
||||||
|
<param>${basedir}/target/server1</param>
|
||||||
|
</args>
|
||||||
|
</configuration>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.activemq.examples.failover</groupId>
|
||||||
|
<artifactId>zookeeper-single-pair-ordered-failback</artifactId>
|
||||||
|
<version>2.18.0-SNAPSHOT</version>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-clean-plugin</artifactId>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
</build>
|
||||||
|
</project>
|
|
@ -0,0 +1,94 @@
|
||||||
|
# Zookeeper Single Pair Failback Example
|
||||||
|
|
||||||
|
This example demonstrates two servers coupled as a primary-backup pair for high availability (HA) using
|
||||||
|
pluggable quorum vote replication Reference Implementation based on [Apache Curator](https://curator.apache.org/) to use
|
||||||
|
[Apache Zookeeper](https://zookeeper.apache.org/) as external quorum service.
|
||||||
|
|
||||||
|
The example shows a client connection failing over from live to backup when the live broker is crashed and
|
||||||
|
then back to the original live when it is restarted (i.e. "failback").
|
||||||
|
|
||||||
|
To run the example, simply type **mvn verify** from this directory after running a Zookeeper node at `localhost:2181`.
|
||||||
|
|
||||||
|
If no Zookeeper node is configured, can use the commands below (see [Official Zookeeper Docker Image Site](https://hub.docker.com/_/zookeeper)
|
||||||
|
for more details on how configure it).
|
||||||
|
|
||||||
|
Run Zookeeper `3.6.3` with:
|
||||||
|
```
|
||||||
|
$ docker run --name artemis-zk --network host --restart always -d zookeeper:3.6.3
|
||||||
|
```
|
||||||
|
By default, the official docker image exposes `2181 2888 3888 8080` as client, follower, election and AdminServer ports.
|
||||||
|
|
||||||
|
Verify Zookeeper server is correctly started by running:
|
||||||
|
```
|
||||||
|
$ docker logs --follow artemis-zk
|
||||||
|
```
|
||||||
|
It should print the Zookeeper welcome ASCII logs:
|
||||||
|
```
|
||||||
|
ZooKeeper JMX enabled by default
|
||||||
|
Using config: /conf/zoo.cfg
|
||||||
|
2021-08-05 14:29:29,431 [myid:] - INFO [main:QuorumPeerConfig@174] - Reading configuration from: /conf/zoo.cfg
|
||||||
|
2021-08-05 14:29:29,434 [myid:] - INFO [main:QuorumPeerConfig@451] - clientPort is not set
|
||||||
|
2021-08-05 14:29:29,434 [myid:] - INFO [main:QuorumPeerConfig@464] - secureClientPort is not set
|
||||||
|
2021-08-05 14:29:29,434 [myid:] - INFO [main:QuorumPeerConfig@480] - observerMasterPort is not set
|
||||||
|
2021-08-05 14:29:29,435 [myid:] - INFO [main:QuorumPeerConfig@497] - metricsProvider.className is org.apache.zookeeper.metrics.impl.DefaultMetricsProvider
|
||||||
|
2021-08-05 14:29:29,438 [myid:] - ERROR [main:QuorumPeerConfig@722] - Invalid configuration, only one server specified (ignoring)
|
||||||
|
2021-08-05 14:29:29,441 [myid:1] - INFO [main:DatadirCleanupManager@78] - autopurge.snapRetainCount set to 3
|
||||||
|
2021-08-05 14:29:29,441 [myid:1] - INFO [main:DatadirCleanupManager@79] - autopurge.purgeInterval set to 0
|
||||||
|
2021-08-05 14:29:29,441 [myid:1] - INFO [main:DatadirCleanupManager@101] - Purge task is not scheduled.
|
||||||
|
2021-08-05 14:29:29,441 [myid:1] - WARN [main:QuorumPeerMain@138] - Either no config or no quorum defined in config, running in standalone mode
|
||||||
|
2021-08-05 14:29:29,444 [myid:1] - INFO [main:ManagedUtil@44] - Log4j 1.2 jmx support found and enabled.
|
||||||
|
2021-08-05 14:29:29,449 [myid:1] - INFO [main:QuorumPeerConfig@174] - Reading configuration from: /conf/zoo.cfg
|
||||||
|
2021-08-05 14:29:29,449 [myid:1] - INFO [main:QuorumPeerConfig@451] - clientPort is not set
|
||||||
|
2021-08-05 14:29:29,449 [myid:1] - INFO [main:QuorumPeerConfig@464] - secureClientPort is not set
|
||||||
|
2021-08-05 14:29:29,449 [myid:1] - INFO [main:QuorumPeerConfig@480] - observerMasterPort is not set
|
||||||
|
2021-08-05 14:29:29,450 [myid:1] - INFO [main:QuorumPeerConfig@497] - metricsProvider.className is org.apache.zookeeper.metrics.impl.DefaultMetricsProvider
|
||||||
|
2021-08-05 14:29:29,450 [myid:1] - ERROR [main:QuorumPeerConfig@722] - Invalid configuration, only one server specified (ignoring)
|
||||||
|
2021-08-05 14:29:29,451 [myid:1] - INFO [main:ZooKeeperServerMain@122] - Starting server
|
||||||
|
2021-08-05 14:29:29,459 [myid:1] - INFO [main:ServerMetrics@62] - ServerMetrics initialized with provider org.apache.zookeeper.metrics.impl.DefaultMetricsProvider@525f1e4e
|
||||||
|
2021-08-05 14:29:29,461 [myid:1] - INFO [main:FileTxnSnapLog@124] - zookeeper.snapshot.trust.empty : false
|
||||||
|
2021-08-05 14:29:29,467 [myid:1] - INFO [main:ZookeeperBanner@42] -
|
||||||
|
2021-08-05 14:29:29,467 [myid:1] - INFO [main:ZookeeperBanner@42] - ______ _
|
||||||
|
2021-08-05 14:29:29,467 [myid:1] - INFO [main:ZookeeperBanner@42] - |___ / | |
|
||||||
|
2021-08-05 14:29:29,467 [myid:1] - INFO [main:ZookeeperBanner@42] - / / ___ ___ | | __ ___ ___ _ __ ___ _ __
|
||||||
|
2021-08-05 14:29:29,468 [myid:1] - INFO [main:ZookeeperBanner@42] - / / / _ \ / _ \ | |/ / / _ \ / _ \ | '_ \ / _ \ | '__|
|
||||||
|
2021-08-05 14:29:29,468 [myid:1] - INFO [main:ZookeeperBanner@42] - / /__ | (_) | | (_) | | < | __/ | __/ | |_) | | __/ | |
|
||||||
|
2021-08-05 14:29:29,468 [myid:1] - INFO [main:ZookeeperBanner@42] - /_____| \___/ \___/ |_|\_\ \___| \___| | .__/ \___| |_|
|
||||||
|
2021-08-05 14:29:29,468 [myid:1] - INFO [main:ZookeeperBanner@42] - | |
|
||||||
|
2021-08-05 14:29:29,468 [myid:1] - INFO [main:ZookeeperBanner@42] - |_|
|
||||||
|
2021-08-05 14:29:29,468 [myid:1] - INFO [main:ZookeeperBanner@42] -
|
||||||
|
```
|
||||||
|
Alternatively, can run
|
||||||
|
```
|
||||||
|
$ docker run -it --rm --network host zookeeper:3.6.3 zkCli.sh -server localhost:2181
|
||||||
|
```
|
||||||
|
Zookeeper server can be reached using localhost:2181 if it output something like:
|
||||||
|
```
|
||||||
|
2021-08-05 14:56:03,739 [myid:localhost:2181] - INFO [main-SendThread(localhost:2181):ClientCnxn$SendThread@1448] - Session establishment complete on server localhost/0:0:0:0:0:0:0:1:2181, session id = 0x100078b8cfc0002, negotiated timeout = 30000
|
||||||
|
|
||||||
|
```
|
||||||
|
Type
|
||||||
|
```
|
||||||
|
[zk: localhost:2181(CONNECTED) 0] quit
|
||||||
|
```
|
||||||
|
to quit the client instance.
|
||||||
|
|
||||||
|
The 2 brokers of this example are already configured to connect to a single Zookeeper node at the mentioned address, thanks to the XML configuration of their `manager`:
|
||||||
|
```xml
|
||||||
|
<manager>
|
||||||
|
<properties>
|
||||||
|
<property key="connect-string" value="localhost:2181"/>
|
||||||
|
<property key="namespace" value="examples"/>
|
||||||
|
<property key="session-ms" value="18000"/>
|
||||||
|
</properties>
|
||||||
|
</manager>
|
||||||
|
```
|
||||||
|
**NOTE** the `namespace` parameter is used to separate the pair information from others if the Zookeeper node is shared with other applications.
|
||||||
|
|
||||||
|
**WARNING** As already recommended on the [High Availability section](https://activemq.apache.org/components/artemis/documentation/latest/ha.html), a production environment needs >= 3 nodes to protect against network partitions.
|
||||||
|
|
||||||
|
This example can be run with
|
||||||
|
```
|
||||||
|
$ mvn verify
|
||||||
|
```
|
||||||
|
|
||||||
|
For more information on ActiveMQ Artemis failover and HA, and clustering in general, please see the clustering section of the user manual.
|
|
@ -0,0 +1,157 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.activemq.artemis.jms.example;
|
||||||
|
|
||||||
|
import javax.jms.Connection;
|
||||||
|
import javax.jms.ConnectionFactory;
|
||||||
|
import javax.jms.JMSException;
|
||||||
|
import javax.jms.MessageConsumer;
|
||||||
|
import javax.jms.MessageProducer;
|
||||||
|
import javax.jms.Queue;
|
||||||
|
import javax.jms.Session;
|
||||||
|
import javax.jms.TextMessage;
|
||||||
|
import javax.naming.InitialContext;
|
||||||
|
|
||||||
|
import org.apache.activemq.artemis.util.ServerUtil;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Example of live and replicating backup pair using Zookeeper as the quorum provider.
|
||||||
|
* <p>
|
||||||
|
* After both servers are started, the live server is killed and the backup becomes active ("fails-over").
|
||||||
|
* <p>
|
||||||
|
* Later the live server is restarted and takes back its position by asking the backup to stop ("fail-back").
|
||||||
|
*/
|
||||||
|
public class ZookeeperSinglePairFailback {
|
||||||
|
|
||||||
|
private static Process server0;
|
||||||
|
|
||||||
|
private static Process server1;
|
||||||
|
|
||||||
|
public static void main(final String[] args) throws Exception {
|
||||||
|
// Step 0. Prepare Zookeeper Evironment as shown on readme.md
|
||||||
|
|
||||||
|
final int numMessages = 30;
|
||||||
|
|
||||||
|
Connection connection = null;
|
||||||
|
|
||||||
|
InitialContext initialContext = null;
|
||||||
|
|
||||||
|
try {
|
||||||
|
server0 = ServerUtil.startServer(args[0], ZookeeperSinglePairFailback.class.getSimpleName() + "-primary", 0, 30000);
|
||||||
|
server1 = ServerUtil.startServer(args[1], ZookeeperSinglePairFailback.class.getSimpleName() + "-backup", 1, 10000);
|
||||||
|
|
||||||
|
// Step 2. Get an initial context for looking up JNDI from the server #1
|
||||||
|
initialContext = new InitialContext();
|
||||||
|
|
||||||
|
// Step 3. Look up the JMS resources from JNDI
|
||||||
|
Queue queue = (Queue) initialContext.lookup("queue/exampleQueue");
|
||||||
|
ConnectionFactory connectionFactory = (ConnectionFactory) initialContext.lookup("ConnectionFactory");
|
||||||
|
|
||||||
|
// Step 4. Create a JMS Connection
|
||||||
|
connection = connectionFactory.createConnection();
|
||||||
|
|
||||||
|
// Step 5. Create a *non-transacted* JMS Session with client acknowledgement
|
||||||
|
Session session = connection.createSession(false, Session.CLIENT_ACKNOWLEDGE);
|
||||||
|
|
||||||
|
// Step 6. Start the connection to ensure delivery occurs
|
||||||
|
connection.start();
|
||||||
|
|
||||||
|
// Step 7. Create a JMS MessageProducer and a MessageConsumer
|
||||||
|
MessageProducer producer = session.createProducer(queue);
|
||||||
|
MessageConsumer consumer = session.createConsumer(queue);
|
||||||
|
|
||||||
|
// Step 8. Send some messages to server #1, the live server
|
||||||
|
for (int i = 0; i < numMessages; i++) {
|
||||||
|
TextMessage message = session.createTextMessage("This is text message " + i);
|
||||||
|
producer.send(message);
|
||||||
|
System.out.println("Sent message: " + message.getText());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 9. Receive and acknowledge a third of the sent messages
|
||||||
|
TextMessage message0 = null;
|
||||||
|
for (int i = 0; i < numMessages / 3; i++) {
|
||||||
|
message0 = (TextMessage) consumer.receive(5000);
|
||||||
|
System.out.println("Got message: " + message0.getText());
|
||||||
|
}
|
||||||
|
message0.acknowledge();
|
||||||
|
System.out.println("Received and acknowledged a third of the sent messages");
|
||||||
|
|
||||||
|
// Step 10. Receive the rest third of the sent messages but *do not* acknowledge them yet
|
||||||
|
for (int i = numMessages / 3; i < numMessages; i++) {
|
||||||
|
message0 = (TextMessage) consumer.receive(5000);
|
||||||
|
System.out.println("Got message: " + message0.getText());
|
||||||
|
}
|
||||||
|
System.out.println("Received without acknowledged the rest of the sent messages");
|
||||||
|
|
||||||
|
Thread.sleep(2000);
|
||||||
|
// Step 11. Crash server #0, the live server, and wait a little while to make sure
|
||||||
|
// it has really crashed
|
||||||
|
ServerUtil.killServer(server0);
|
||||||
|
System.out.println("Killed primary");
|
||||||
|
|
||||||
|
Thread.sleep(2000);
|
||||||
|
|
||||||
|
// Step 12. Acknowledging the received messages will fail as failover to the backup server has occurred
|
||||||
|
try {
|
||||||
|
message0.acknowledge();
|
||||||
|
} catch (JMSException e) {
|
||||||
|
System.out.println("Got (the expected) exception while acknowledging message: " + e.getMessage());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 13. Consume again the 2nd third of the messages again. Note that they are not considered as redelivered.
|
||||||
|
for (int i = numMessages / 3; i < (numMessages / 3) * 2; i++) {
|
||||||
|
message0 = (TextMessage) consumer.receive(5000);
|
||||||
|
System.out.printf("Got message: %s (redelivered?: %s)\n", message0.getText(), message0.getJMSRedelivered());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 14. Acknowledging them on the failed-over broker works fine
|
||||||
|
message0.acknowledge();
|
||||||
|
System.out.println("Acknowledged 2n third of messages");
|
||||||
|
|
||||||
|
// Step 15. Restarting primary
|
||||||
|
server0 = ServerUtil.startServer(args[0], ZookeeperSinglePairFailback.class.getSimpleName() + "-primary", 0, 10000);
|
||||||
|
System.out.println("Started primary");
|
||||||
|
|
||||||
|
// await fail-back to complete
|
||||||
|
Thread.sleep(4000);
|
||||||
|
|
||||||
|
// Step 16. Consuming the 3rd third of the messages. Note that they are not considered as redelivered.
|
||||||
|
for (int i = (numMessages / 3) * 2; i < numMessages; i++) {
|
||||||
|
message0 = (TextMessage) consumer.receive(5000);
|
||||||
|
System.out.printf("Got message: %s (redelivered?: %s)\n", message0.getText(), message0.getJMSRedelivered());
|
||||||
|
}
|
||||||
|
message0.acknowledge();
|
||||||
|
System.out.println("Acknowledged 3d third of messages");
|
||||||
|
|
||||||
|
} finally {
|
||||||
|
// Step 17. Be sure to close our resources!
|
||||||
|
|
||||||
|
if (connection != null) {
|
||||||
|
connection.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (initialContext != null) {
|
||||||
|
initialContext.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
ServerUtil.killServer(server0);
|
||||||
|
ServerUtil.killServer(server1);
|
||||||
|
|
||||||
|
// Step 18. stop the ZK server
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,90 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
or more contributor license agreements. See the NOTICE file
|
||||||
|
distributed with this work for additional information
|
||||||
|
regarding copyright ownership. The ASF licenses this file
|
||||||
|
to you under the Apache License, Version 2.0 (the
|
||||||
|
"License"); you may not use this file except in compliance
|
||||||
|
with the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing,
|
||||||
|
software distributed under the License is distributed on an
|
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
KIND, either express or implied. See the License for the
|
||||||
|
specific language governing permissions and limitations
|
||||||
|
under the License.
|
||||||
|
-->
|
||||||
|
<configuration xmlns="urn:activemq" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="urn:activemq /schema/artemis-configuration.xsd">
|
||||||
|
<core xmlns="urn:activemq:core">
|
||||||
|
|
||||||
|
<bindings-directory>./data/bindings</bindings-directory>
|
||||||
|
|
||||||
|
<journal-directory>./data/journal</journal-directory>
|
||||||
|
|
||||||
|
<large-messages-directory>./data/largemessages</large-messages-directory>
|
||||||
|
|
||||||
|
<paging-directory>./data/paging</paging-directory>
|
||||||
|
|
||||||
|
<cluster-user>exampleUser</cluster-user>
|
||||||
|
|
||||||
|
<cluster-password>secret</cluster-password>
|
||||||
|
|
||||||
|
<ha-policy>
|
||||||
|
<replication>
|
||||||
|
<primary>
|
||||||
|
<manager>
|
||||||
|
<properties>
|
||||||
|
<property key="connect-string" value="localhost:2181"/>
|
||||||
|
<property key="namespace" value="examples"/>
|
||||||
|
<property key="session-ms" value="18000"/>
|
||||||
|
</properties>
|
||||||
|
</manager>
|
||||||
|
</primary>
|
||||||
|
</replication>
|
||||||
|
</ha-policy>
|
||||||
|
|
||||||
|
<connectors>
|
||||||
|
<connector name="netty-connector">tcp://localhost:61616</connector>
|
||||||
|
<connector name="netty-backup-connector">tcp://localhost:61617</connector>
|
||||||
|
</connectors>
|
||||||
|
|
||||||
|
<!-- Acceptors -->
|
||||||
|
<acceptors>
|
||||||
|
<acceptor name="netty-acceptor">tcp://localhost:61616</acceptor>
|
||||||
|
</acceptors>
|
||||||
|
|
||||||
|
<cluster-connections>
|
||||||
|
<cluster-connection name="my-cluster">
|
||||||
|
<connector-ref>netty-connector</connector-ref>
|
||||||
|
<static-connectors>
|
||||||
|
<connector-ref>netty-backup-connector</connector-ref>
|
||||||
|
</static-connectors>
|
||||||
|
</cluster-connection>
|
||||||
|
</cluster-connections>
|
||||||
|
|
||||||
|
<!-- Other config -->
|
||||||
|
|
||||||
|
<security-settings>
|
||||||
|
<!--security for example queue-->
|
||||||
|
<security-setting match="exampleQueue">
|
||||||
|
<permission roles="guest" type="createDurableQueue"/>
|
||||||
|
<permission roles="guest" type="deleteDurableQueue"/>
|
||||||
|
<permission roles="guest" type="createNonDurableQueue"/>
|
||||||
|
<permission roles="guest" type="deleteNonDurableQueue"/>
|
||||||
|
<permission roles="guest" type="consume"/>
|
||||||
|
<permission roles="guest" type="send"/>
|
||||||
|
</security-setting>
|
||||||
|
</security-settings>
|
||||||
|
|
||||||
|
<addresses>
|
||||||
|
<address name="exampleQueue">
|
||||||
|
<anycast>
|
||||||
|
<queue name="exampleQueue"/>
|
||||||
|
</anycast>
|
||||||
|
</address>
|
||||||
|
</addresses>
|
||||||
|
</core>
|
||||||
|
</configuration>
|
|
@ -0,0 +1,91 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
or more contributor license agreements. See the NOTICE file
|
||||||
|
distributed with this work for additional information
|
||||||
|
regarding copyright ownership. The ASF licenses this file
|
||||||
|
to you under the Apache License, Version 2.0 (the
|
||||||
|
"License"); you may not use this file except in compliance
|
||||||
|
with the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing,
|
||||||
|
software distributed under the License is distributed on an
|
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
KIND, either express or implied. See the License for the
|
||||||
|
specific language governing permissions and limitations
|
||||||
|
under the License.
|
||||||
|
-->
|
||||||
|
<configuration xmlns="urn:activemq" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="urn:activemq /schema/artemis-configuration.xsd">
|
||||||
|
<core xmlns="urn:activemq:core">
|
||||||
|
|
||||||
|
<bindings-directory>./data/bindings</bindings-directory>
|
||||||
|
|
||||||
|
<journal-directory>./data/journal</journal-directory>
|
||||||
|
|
||||||
|
<large-messages-directory>./data/largemessages</large-messages-directory>
|
||||||
|
|
||||||
|
<paging-directory>./data/paging</paging-directory>
|
||||||
|
|
||||||
|
<cluster-user>exampleUser</cluster-user>
|
||||||
|
|
||||||
|
<cluster-password>secret</cluster-password>
|
||||||
|
|
||||||
|
<ha-policy>
|
||||||
|
<replication>
|
||||||
|
<backup>
|
||||||
|
<manager>
|
||||||
|
<properties>
|
||||||
|
<property key="connect-string" value="localhost:2181"/>
|
||||||
|
<property key="namespace" value="examples"/>
|
||||||
|
<property key="session-ms" value="18000"/>
|
||||||
|
</properties>
|
||||||
|
</manager>
|
||||||
|
<allow-failback>true</allow-failback>
|
||||||
|
</backup>
|
||||||
|
</replication>
|
||||||
|
</ha-policy>
|
||||||
|
|
||||||
|
<!-- Connectors -->
|
||||||
|
<connectors>
|
||||||
|
<connector name="netty-live-connector">tcp://localhost:61616</connector>
|
||||||
|
<connector name="netty-connector">tcp://localhost:61617</connector>
|
||||||
|
</connectors>
|
||||||
|
|
||||||
|
<!-- Acceptors -->
|
||||||
|
<acceptors>
|
||||||
|
<acceptor name="netty-acceptor">tcp://localhost:61617</acceptor>
|
||||||
|
</acceptors>
|
||||||
|
|
||||||
|
<cluster-connections>
|
||||||
|
<cluster-connection name="my-cluster">
|
||||||
|
<connector-ref>netty-connector</connector-ref>
|
||||||
|
<static-connectors>
|
||||||
|
<connector-ref>netty-live-connector</connector-ref>
|
||||||
|
</static-connectors>
|
||||||
|
</cluster-connection>
|
||||||
|
</cluster-connections>
|
||||||
|
<!-- Other config -->
|
||||||
|
|
||||||
|
<security-settings>
|
||||||
|
<!--security for example queue-->
|
||||||
|
<security-setting match="exampleQueue">
|
||||||
|
<permission roles="guest" type="createDurableQueue"/>
|
||||||
|
<permission roles="guest" type="deleteDurableQueue"/>
|
||||||
|
<permission roles="guest" type="createNonDurableQueue"/>
|
||||||
|
<permission roles="guest" type="deleteNonDurableQueue"/>
|
||||||
|
<permission roles="guest" type="consume"/>
|
||||||
|
<permission roles="guest" type="send"/>
|
||||||
|
</security-setting>
|
||||||
|
</security-settings>
|
||||||
|
|
||||||
|
<addresses>
|
||||||
|
<address name="exampleQueue">
|
||||||
|
<anycast>
|
||||||
|
<queue name="exampleQueue"/>
|
||||||
|
</anycast>
|
||||||
|
</address>
|
||||||
|
</addresses>
|
||||||
|
</core>
|
||||||
|
</configuration>
|
|
@ -0,0 +1,20 @@
|
||||||
|
# Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
# or more contributor license agreements. See the NOTICE file
|
||||||
|
# distributed with this work for additional information
|
||||||
|
# regarding copyright ownership. The ASF licenses this file
|
||||||
|
# to you under the Apache License, Version 2.0 (the
|
||||||
|
# "License"); you may not use this file except in compliance
|
||||||
|
# with the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing,
|
||||||
|
# software distributed under the License is distributed on an
|
||||||
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
# KIND, either express or implied. See the License for the
|
||||||
|
# specific language governing permissions and limitations
|
||||||
|
# under the License.
|
||||||
|
|
||||||
|
java.naming.factory.initial=org.apache.activemq.artemis.jndi.ActiveMQInitialContextFactory
|
||||||
|
connectionFactory.ConnectionFactory=tcp://localhost:61616?ha=true&retryInterval=1000&retryIntervalMultiplier=1.0&reconnectAttempts=-1
|
||||||
|
queue.queue/exampleQueue=exampleQueue
|
|
@ -77,7 +77,6 @@ public class InfiniteRedeliveryTest extends ActiveMQTestBase {
|
||||||
|
|
||||||
Configuration backupConfig;
|
Configuration backupConfig;
|
||||||
Configuration liveConfig;
|
Configuration liveConfig;
|
||||||
NodeManager nodeManager;
|
|
||||||
|
|
||||||
protected TestableServer createTestableServer(Configuration config, NodeManager nodeManager) throws Exception {
|
protected TestableServer createTestableServer(Configuration config, NodeManager nodeManager) throws Exception {
|
||||||
boolean isBackup = config.getHAPolicyConfiguration() instanceof ReplicaPolicyConfiguration || config.getHAPolicyConfiguration() instanceof SharedStoreSlavePolicyConfiguration;
|
boolean isBackup = config.getHAPolicyConfiguration() instanceof ReplicaPolicyConfiguration || config.getHAPolicyConfiguration() instanceof SharedStoreSlavePolicyConfiguration;
|
||||||
|
@ -99,13 +98,11 @@ public class InfiniteRedeliveryTest extends ActiveMQTestBase {
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
nodeManager = new InVMNodeManager(true, backupConfig.getJournalLocation());
|
backupServer = createTestableServer(backupConfig, new InVMNodeManager(true, backupConfig.getJournalLocation()));
|
||||||
|
|
||||||
backupServer = createTestableServer(backupConfig, nodeManager);
|
|
||||||
|
|
||||||
liveConfig.clearAcceptorConfigurations().addAcceptorConfiguration(TransportConfigurationUtils.getNettyAcceptor(true, 0));
|
liveConfig.clearAcceptorConfigurations().addAcceptorConfiguration(TransportConfigurationUtils.getNettyAcceptor(true, 0));
|
||||||
|
|
||||||
liveServer = createTestableServer(liveConfig, nodeManager);
|
liveServer = createTestableServer(liveConfig, new InVMNodeManager(false, liveConfig.getJournalLocation()));
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void configureReplicationPair(TransportConfiguration backupConnector,
|
protected void configureReplicationPair(TransportConfiguration backupConnector,
|
||||||
|
|
|
@ -659,6 +659,8 @@ public class FailoverTest extends FailoverTestBase {
|
||||||
|
|
||||||
backupServer.getServer().fail(true);
|
backupServer.getServer().fail(true);
|
||||||
|
|
||||||
|
decrementActivationSequenceForForceRestartOf(liveServer);
|
||||||
|
|
||||||
liveServer.start();
|
liveServer.start();
|
||||||
|
|
||||||
consumer.close();
|
consumer.close();
|
||||||
|
@ -823,6 +825,7 @@ public class FailoverTest extends FailoverTestBase {
|
||||||
Assert.assertFalse("must NOT be a backup", isBackup);
|
Assert.assertFalse("must NOT be a backup", isBackup);
|
||||||
adaptLiveConfigForReplicatedFailBack(liveServer);
|
adaptLiveConfigForReplicatedFailBack(liveServer);
|
||||||
beforeRestart(liveServer);
|
beforeRestart(liveServer);
|
||||||
|
decrementActivationSequenceForForceRestartOf(liveServer);
|
||||||
liveServer.start();
|
liveServer.start();
|
||||||
Assert.assertTrue("live initialized...", liveServer.getServer().waitForActivation(15, TimeUnit.SECONDS));
|
Assert.assertTrue("live initialized...", liveServer.getServer().waitForActivation(15, TimeUnit.SECONDS));
|
||||||
|
|
||||||
|
@ -931,12 +934,13 @@ public class FailoverTest extends FailoverTestBase {
|
||||||
while (!backupServer.isStarted() && i++ < 100) {
|
while (!backupServer.isStarted() && i++ < 100) {
|
||||||
Thread.sleep(100);
|
Thread.sleep(100);
|
||||||
}
|
}
|
||||||
liveServer.getServer().waitForActivation(5, TimeUnit.SECONDS);
|
backupServer.getServer().waitForActivation(5, TimeUnit.SECONDS);
|
||||||
Assert.assertTrue(backupServer.isStarted());
|
Assert.assertTrue(backupServer.isStarted());
|
||||||
|
|
||||||
if (isReplicated) {
|
if (isReplicated) {
|
||||||
FileMoveManager moveManager = new FileMoveManager(backupServer.getServer().getConfiguration().getJournalLocation(), 0);
|
FileMoveManager moveManager = new FileMoveManager(backupServer.getServer().getConfiguration().getJournalLocation(), 0);
|
||||||
Assert.assertEquals(1, moveManager.getNumberOfFolders());
|
// backup has not had a chance to restart as a backup and cleanup
|
||||||
|
Wait.assertTrue(() -> moveManager.getNumberOfFolders() <= 2);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
backupServer.stop();
|
backupServer.stop();
|
||||||
|
@ -2427,6 +2431,10 @@ public class FailoverTest extends FailoverTestBase {
|
||||||
// no-op
|
// no-op
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected void decrementActivationSequenceForForceRestartOf(TestableServer liveServer) throws Exception {
|
||||||
|
// no-op
|
||||||
|
}
|
||||||
|
|
||||||
protected ClientSession sendAndConsume(final ClientSessionFactory sf1, final boolean createQueue) throws Exception {
|
protected ClientSession sendAndConsume(final ClientSessionFactory sf1, final boolean createQueue) throws Exception {
|
||||||
ClientSession session = createSession(sf1, false, true, true);
|
ClientSession session = createSession(sf1, false, true, true);
|
||||||
|
|
||||||
|
|
|
@ -46,7 +46,6 @@ import org.apache.activemq.artemis.core.remoting.impl.invm.InVMRegistry;
|
||||||
import org.apache.activemq.artemis.core.server.NodeManager;
|
import org.apache.activemq.artemis.core.server.NodeManager;
|
||||||
import org.apache.activemq.artemis.core.server.cluster.ha.HAPolicy;
|
import org.apache.activemq.artemis.core.server.cluster.ha.HAPolicy;
|
||||||
import org.apache.activemq.artemis.core.server.cluster.ha.ReplicatedPolicy;
|
import org.apache.activemq.artemis.core.server.cluster.ha.ReplicatedPolicy;
|
||||||
import org.apache.activemq.artemis.core.server.cluster.ha.ReplicationPrimaryPolicy;
|
|
||||||
import org.apache.activemq.artemis.core.server.impl.ActiveMQServerImpl;
|
import org.apache.activemq.artemis.core.server.impl.ActiveMQServerImpl;
|
||||||
import org.apache.activemq.artemis.core.server.impl.InVMNodeManager;
|
import org.apache.activemq.artemis.core.server.impl.InVMNodeManager;
|
||||||
import org.apache.activemq.artemis.quorum.file.FileBasedPrimitiveManager;
|
import org.apache.activemq.artemis.quorum.file.FileBasedPrimitiveManager;
|
||||||
|
@ -88,6 +87,10 @@ public abstract class FailoverTestBase extends ActiveMQTestBase {
|
||||||
|
|
||||||
protected NodeManager nodeManager;
|
protected NodeManager nodeManager;
|
||||||
|
|
||||||
|
protected NodeManager backupNodeManager;
|
||||||
|
|
||||||
|
protected DistributedPrimitiveManagerConfiguration managerConfiguration;
|
||||||
|
|
||||||
protected boolean startBackupServer = true;
|
protected boolean startBackupServer = true;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -173,6 +176,10 @@ public abstract class FailoverTestBase extends ActiveMQTestBase {
|
||||||
return new InVMNodeManager(false);
|
return new InVMNodeManager(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected NodeManager createNodeManager(Configuration configuration) throws Exception {
|
||||||
|
return new InVMNodeManager(false, configuration.getNodeManagerLockLocation());
|
||||||
|
}
|
||||||
|
|
||||||
protected void createConfigs() throws Exception {
|
protected void createConfigs() throws Exception {
|
||||||
nodeManager = createNodeManager();
|
nodeManager = createNodeManager();
|
||||||
TransportConfiguration liveConnector = getConnectorTransportConfiguration(true);
|
TransportConfiguration liveConnector = getConnectorTransportConfiguration(true);
|
||||||
|
@ -211,13 +218,14 @@ public abstract class FailoverTestBase extends ActiveMQTestBase {
|
||||||
backupConfig.setBindingsDirectory(getBindingsDir(0, true)).setJournalDirectory(getJournalDir(0, true)).setPagingDirectory(getPageDir(0, true)).setLargeMessagesDirectory(getLargeMessagesDir(0, true)).setSecurityEnabled(false);
|
backupConfig.setBindingsDirectory(getBindingsDir(0, true)).setJournalDirectory(getJournalDir(0, true)).setPagingDirectory(getPageDir(0, true)).setLargeMessagesDirectory(getLargeMessagesDir(0, true)).setSecurityEnabled(false);
|
||||||
|
|
||||||
setupHAPolicyConfiguration();
|
setupHAPolicyConfiguration();
|
||||||
nodeManager = createReplicatedBackupNodeManager(backupConfig);
|
backupNodeManager = createReplicatedBackupNodeManager(backupConfig);
|
||||||
|
|
||||||
backupServer = createTestableServer(backupConfig);
|
backupServer = createTestableServer(backupConfig, backupNodeManager);
|
||||||
|
|
||||||
liveConfig.clearAcceptorConfigurations().addAcceptorConfiguration(getAcceptorTransportConfiguration(true));
|
liveConfig.clearAcceptorConfigurations().addAcceptorConfiguration(getAcceptorTransportConfiguration(true));
|
||||||
|
|
||||||
liveServer = createTestableServer(liveConfig);
|
nodeManager = createNodeManager(liveConfig);
|
||||||
|
liveServer = createTestableServer(liveConfig, nodeManager);
|
||||||
|
|
||||||
if (supportsRetention()) {
|
if (supportsRetention()) {
|
||||||
liveServer.getServer().getConfiguration().setJournalRetentionDirectory(getJournalDir(0, false) + "_retention");
|
liveServer.getServer().getConfiguration().setJournalRetentionDirectory(getJournalDir(0, false) + "_retention");
|
||||||
|
@ -233,7 +241,7 @@ public abstract class FailoverTestBase extends ActiveMQTestBase {
|
||||||
backupConfig = createDefaultInVMConfig();
|
backupConfig = createDefaultInVMConfig();
|
||||||
liveConfig = createDefaultInVMConfig();
|
liveConfig = createDefaultInVMConfig();
|
||||||
|
|
||||||
DistributedPrimitiveManagerConfiguration managerConfiguration =
|
managerConfiguration =
|
||||||
new DistributedPrimitiveManagerConfiguration(FileBasedPrimitiveManager.class.getName(),
|
new DistributedPrimitiveManagerConfiguration(FileBasedPrimitiveManager.class.getName(),
|
||||||
Collections.singletonMap("locks-folder", tmpFolder.newFolder("manager").toString()));
|
Collections.singletonMap("locks-folder", tmpFolder.newFolder("manager").toString()));
|
||||||
|
|
||||||
|
@ -242,13 +250,14 @@ public abstract class FailoverTestBase extends ActiveMQTestBase {
|
||||||
backupConfig.setBindingsDirectory(getBindingsDir(0, true)).setJournalDirectory(getJournalDir(0, true)).setPagingDirectory(getPageDir(0, true)).setLargeMessagesDirectory(getLargeMessagesDir(0, true)).setSecurityEnabled(false);
|
backupConfig.setBindingsDirectory(getBindingsDir(0, true)).setJournalDirectory(getJournalDir(0, true)).setPagingDirectory(getPageDir(0, true)).setLargeMessagesDirectory(getLargeMessagesDir(0, true)).setSecurityEnabled(false);
|
||||||
|
|
||||||
setupHAPolicyConfiguration();
|
setupHAPolicyConfiguration();
|
||||||
nodeManager = createReplicatedBackupNodeManager(backupConfig);
|
backupNodeManager = createReplicatedBackupNodeManager(backupConfig);
|
||||||
|
|
||||||
backupServer = createTestableServer(backupConfig);
|
backupServer = createTestableServer(backupConfig, backupNodeManager);
|
||||||
|
|
||||||
liveConfig.clearAcceptorConfigurations().addAcceptorConfiguration(getAcceptorTransportConfiguration(true));
|
liveConfig.clearAcceptorConfigurations().addAcceptorConfiguration(getAcceptorTransportConfiguration(true));
|
||||||
|
|
||||||
liveServer = createTestableServer(liveConfig);
|
nodeManager = createNodeManager(liveConfig);
|
||||||
|
liveServer = createTestableServer(liveConfig, nodeManager);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void setupHAPolicyConfiguration() {
|
protected void setupHAPolicyConfiguration() {
|
||||||
|
@ -272,8 +281,6 @@ public abstract class FailoverTestBase extends ActiveMQTestBase {
|
||||||
HAPolicy policy = server.getServer().getHAPolicy();
|
HAPolicy policy = server.getServer().getHAPolicy();
|
||||||
if (policy instanceof ReplicatedPolicy) {
|
if (policy instanceof ReplicatedPolicy) {
|
||||||
((ReplicatedPolicy) policy).setCheckForLiveServer(true);
|
((ReplicatedPolicy) policy).setCheckForLiveServer(true);
|
||||||
} else if (policy instanceof ReplicationPrimaryPolicy) {
|
|
||||||
Assert.assertTrue("Adapting won't work for the current configuration", ((ReplicationPrimaryPolicy) policy).isCheckForLiveServer());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -294,6 +301,7 @@ public abstract class FailoverTestBase extends ActiveMQTestBase {
|
||||||
|
|
||||||
nodeManager = null;
|
nodeManager = null;
|
||||||
|
|
||||||
|
backupNodeManager = null;
|
||||||
try {
|
try {
|
||||||
ServerSocket serverSocket = new ServerSocket(61616);
|
ServerSocket serverSocket = new ServerSocket(61616);
|
||||||
serverSocket.close();
|
serverSocket.close();
|
||||||
|
|
|
@ -182,8 +182,6 @@ public abstract class MultipleServerFailoverTestBase extends ActiveMQTestBase {
|
||||||
break;
|
break;
|
||||||
case PluggableQuorumReplication:
|
case PluggableQuorumReplication:
|
||||||
haPolicyConfiguration = ReplicationBackupPolicyConfiguration.withDefault()
|
haPolicyConfiguration = ReplicationBackupPolicyConfiguration.withDefault()
|
||||||
.setVoteRetries(1)
|
|
||||||
.setVoteRetryWait(1000)
|
|
||||||
.setDistributedManagerConfiguration(getOrCreatePluggableQuorumConfiguration())
|
.setDistributedManagerConfiguration(getOrCreatePluggableQuorumConfiguration())
|
||||||
.setGroupName(getNodeGroupName() != null ? (getNodeGroupName() + "-" + i) : null);
|
.setGroupName(getNodeGroupName() != null ? (getNodeGroupName() + "-" + i) : null);
|
||||||
break;
|
break;
|
||||||
|
@ -278,12 +276,14 @@ public abstract class MultipleServerFailoverTestBase extends ActiveMQTestBase {
|
||||||
return addClientSession(sf.createSession(xa, autoCommitSends, autoCommitAcks));
|
return addClientSession(sf.createSession(xa, autoCommitSends, autoCommitAcks));
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void waitForDistribution(SimpleString address, ActiveMQServer server, int messageCount) throws Exception {
|
protected boolean waitForDistribution(SimpleString address, ActiveMQServer server, int messageCount) throws Exception {
|
||||||
ActiveMQServerLogger.LOGGER.debug("waiting for distribution of messages on server " + server);
|
ActiveMQServerLogger.LOGGER.debug("waiting for distribution of messages on server " + server);
|
||||||
|
|
||||||
Queue q = (Queue) server.getPostOffice().getBinding(address).getBindable();
|
Queue q = (Queue) server.getPostOffice().getBinding(address).getBindable();
|
||||||
|
|
||||||
Wait.waitFor(() -> getMessageCount(q) >= messageCount);
|
return Wait.waitFor(() -> {
|
||||||
|
return getMessageCount(q) >= messageCount;
|
||||||
|
});
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -156,12 +156,13 @@ public class ReplicaTimeoutTest extends ActiveMQTestBase {
|
||||||
liveConfig.setBindingsDirectory(getBindingsDir(0, false)).setJournalDirectory(getJournalDir(0, false)).
|
liveConfig.setBindingsDirectory(getBindingsDir(0, false)).setJournalDirectory(getJournalDir(0, false)).
|
||||||
setPagingDirectory(getPageDir(0, false)).setLargeMessagesDirectory(getLargeMessagesDir(0, false)).setSecurityEnabled(false);
|
setPagingDirectory(getPageDir(0, false)).setLargeMessagesDirectory(getLargeMessagesDir(0, false)).setSecurityEnabled(false);
|
||||||
|
|
||||||
NodeManager nodeManager = createReplicatedBackupNodeManager(backupConfig);
|
NodeManager replicatedBackupNodeManager = createReplicatedBackupNodeManager(backupConfig);
|
||||||
|
|
||||||
backupServer = createTestableServer(backupConfig, nodeManager);
|
backupServer = createTestableServer(backupConfig, replicatedBackupNodeManager);
|
||||||
|
|
||||||
liveConfig.clearAcceptorConfigurations().addAcceptorConfiguration(getAcceptorTransportConfiguration(true));
|
liveConfig.clearAcceptorConfigurations().addAcceptorConfiguration(getAcceptorTransportConfiguration(true));
|
||||||
|
|
||||||
|
NodeManager nodeManager = createReplicatedBackupNodeManager(liveConfig);
|
||||||
liveServer = createTestableServer(liveConfig, nodeManager);
|
liveServer = createTestableServer(liveConfig, nodeManager);
|
||||||
|
|
||||||
final TestableServer theBackup = backupServer;
|
final TestableServer theBackup = backupServer;
|
||||||
|
|
|
@ -75,7 +75,10 @@ public class ReplicatedMultipleServerFailoverExtraBackupsTest extends Replicated
|
||||||
|
|
||||||
sendCrashReceive();
|
sendCrashReceive();
|
||||||
Wait.assertTrue(backupServers.get(0)::isActive, 5000, 10);
|
Wait.assertTrue(backupServers.get(0)::isActive, 5000, 10);
|
||||||
|
Wait.assertTrue(backupServers.get(1)::isActive, 5000, 10);
|
||||||
waitForTopology(backupServers.get(0).getServer(), liveServers.size(), 2);
|
waitForTopology(backupServers.get(0).getServer(), liveServers.size(), 2);
|
||||||
|
waitForTopology(backupServers.get(1).getServer(), liveServers.size(), 2);
|
||||||
|
|
||||||
sendCrashBackupReceive();
|
sendCrashBackupReceive();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -115,6 +118,14 @@ public class ReplicatedMultipleServerFailoverExtraBackupsTest extends Replicated
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void sendCrashBackupReceive() throws Exception {
|
protected void sendCrashBackupReceive() throws Exception {
|
||||||
|
|
||||||
|
//make sure bindings are ready before sending messages b/c we verify strict load balancing in waitForDistribution
|
||||||
|
this.waitForBindings( backupServers.get(0).getServer(), ADDRESS.toString(), false, 1, 0, 2000);
|
||||||
|
this.waitForBindings( backupServers.get(0).getServer(), ADDRESS.toString(), false, 1, 0, 2000);
|
||||||
|
|
||||||
|
this.waitForBindings( backupServers.get(1).getServer(), ADDRESS.toString(), false, 1, 0, 2000);
|
||||||
|
this.waitForBindings( backupServers.get(1).getServer(), ADDRESS.toString(), false, 1, 0, 2000);
|
||||||
|
|
||||||
ServerLocator locator0 = getBackupServerLocator(0);
|
ServerLocator locator0 = getBackupServerLocator(0);
|
||||||
ServerLocator locator1 = getBackupServerLocator(1);
|
ServerLocator locator1 = getBackupServerLocator(1);
|
||||||
|
|
||||||
|
@ -138,8 +149,8 @@ public class ReplicatedMultipleServerFailoverExtraBackupsTest extends Replicated
|
||||||
|
|
||||||
producer.close();
|
producer.close();
|
||||||
|
|
||||||
waitForDistribution(ADDRESS, backupServers.get(0).getServer(), 100);
|
assertTrue(waitForDistribution(ADDRESS, backupServers.get(0).getServer(), 100));
|
||||||
waitForDistribution(ADDRESS, backupServers.get(1).getServer(), 100);
|
assertTrue(waitForDistribution(ADDRESS, backupServers.get(1).getServer(), 100));
|
||||||
|
|
||||||
List<TestableServer> toCrash = new ArrayList<>();
|
List<TestableServer> toCrash = new ArrayList<>();
|
||||||
for (TestableServer backupServer : backupServers) {
|
for (TestableServer backupServer : backupServers) {
|
||||||
|
|
|
@ -24,7 +24,6 @@ import org.apache.activemq.artemis.api.core.ActiveMQException;
|
||||||
import org.apache.activemq.artemis.api.core.Interceptor;
|
import org.apache.activemq.artemis.api.core.Interceptor;
|
||||||
import org.apache.activemq.artemis.api.core.TransportConfiguration;
|
import org.apache.activemq.artemis.api.core.TransportConfiguration;
|
||||||
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
|
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
|
||||||
import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration;
|
|
||||||
import org.apache.activemq.artemis.core.protocol.core.Packet;
|
import org.apache.activemq.artemis.core.protocol.core.Packet;
|
||||||
import org.apache.activemq.artemis.core.protocol.core.impl.PacketImpl;
|
import org.apache.activemq.artemis.core.protocol.core.impl.PacketImpl;
|
||||||
import org.apache.activemq.artemis.spi.core.protocol.RemotingConnection;
|
import org.apache.activemq.artemis.spi.core.protocol.RemotingConnection;
|
||||||
|
@ -90,7 +89,6 @@ public class PluggableQuorumBackupAuthenticationTest extends FailoverTestBase {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void setupHAPolicyConfiguration() {
|
protected void setupHAPolicyConfiguration() {
|
||||||
((ReplicationPrimaryPolicyConfiguration) liveConfig.getHAPolicyConfiguration()).setCheckForLiveServer(true);
|
|
||||||
((ReplicationBackupPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setMaxSavedReplicatedJournalsSize(2).setAllowFailBack(true);
|
((ReplicationBackupPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setMaxSavedReplicatedJournalsSize(2).setAllowFailBack(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -17,8 +17,7 @@
|
||||||
|
|
||||||
package org.apache.activemq.artemis.tests.integration.cluster.failover.quorum;
|
package org.apache.activemq.artemis.tests.integration.cluster.failover.quorum;
|
||||||
|
|
||||||
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
|
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;;
|
||||||
import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration;
|
|
||||||
import org.apache.activemq.artemis.tests.integration.cluster.failover.BackupSyncJournalTest;
|
import org.apache.activemq.artemis.tests.integration.cluster.failover.BackupSyncJournalTest;
|
||||||
|
|
||||||
public class PluggableQuorumBackupSyncJournalTest extends BackupSyncJournalTest {
|
public class PluggableQuorumBackupSyncJournalTest extends BackupSyncJournalTest {
|
||||||
|
@ -30,8 +29,6 @@ public class PluggableQuorumBackupSyncJournalTest extends BackupSyncJournalTest
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void setupHAPolicyConfiguration() {
|
protected void setupHAPolicyConfiguration() {
|
||||||
((ReplicationPrimaryPolicyConfiguration) liveConfig.getHAPolicyConfiguration())
|
|
||||||
.setCheckForLiveServer(true);
|
|
||||||
((ReplicationBackupPolicyConfiguration) backupConfig.getHAPolicyConfiguration())
|
((ReplicationBackupPolicyConfiguration) backupConfig.getHAPolicyConfiguration())
|
||||||
.setMaxSavedReplicatedJournalsSize(2)
|
.setMaxSavedReplicatedJournalsSize(2)
|
||||||
.setAllowFailBack(true);
|
.setAllowFailBack(true);
|
||||||
|
|
|
@ -29,18 +29,24 @@ import org.apache.activemq.artemis.api.core.QueueConfiguration;
|
||||||
import org.apache.activemq.artemis.api.core.TransportConfiguration;
|
import org.apache.activemq.artemis.api.core.TransportConfiguration;
|
||||||
import org.apache.activemq.artemis.api.core.client.ClientSession;
|
import org.apache.activemq.artemis.api.core.client.ClientSession;
|
||||||
import org.apache.activemq.artemis.component.WebServerComponent;
|
import org.apache.activemq.artemis.component.WebServerComponent;
|
||||||
|
import org.apache.activemq.artemis.core.config.ha.DistributedPrimitiveManagerConfiguration;
|
||||||
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
|
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
|
||||||
import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration;
|
|
||||||
import org.apache.activemq.artemis.core.server.ActiveMQServer;
|
import org.apache.activemq.artemis.core.server.ActiveMQServer;
|
||||||
|
import org.apache.activemq.artemis.core.server.NodeManager;
|
||||||
import org.apache.activemq.artemis.core.server.ServiceComponent;
|
import org.apache.activemq.artemis.core.server.ServiceComponent;
|
||||||
import org.apache.activemq.artemis.dto.AppDTO;
|
import org.apache.activemq.artemis.dto.AppDTO;
|
||||||
import org.apache.activemq.artemis.dto.WebServerDTO;
|
import org.apache.activemq.artemis.dto.WebServerDTO;
|
||||||
|
import org.apache.activemq.artemis.quorum.MutableLong;
|
||||||
|
import org.apache.activemq.artemis.quorum.file.FileBasedPrimitiveManager;
|
||||||
import org.apache.activemq.artemis.tests.integration.cluster.failover.FailoverTest;
|
import org.apache.activemq.artemis.tests.integration.cluster.failover.FailoverTest;
|
||||||
|
import org.apache.activemq.artemis.tests.integration.cluster.util.TestableServer;
|
||||||
import org.apache.activemq.artemis.tests.util.Wait;
|
import org.apache.activemq.artemis.tests.util.Wait;
|
||||||
|
import org.jboss.logging.Logger;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
public class PluggableQuorumNettyNoGroupNameReplicatedFailoverTest extends FailoverTest {
|
public class PluggableQuorumNettyNoGroupNameReplicatedFailoverTest extends FailoverTest {
|
||||||
|
private static final Logger log = Logger.getLogger(PluggableQuorumReplicatedLargeMessageFailoverTest.class);
|
||||||
|
|
||||||
protected void beforeWaitForRemoteBackupSynchronization() {
|
protected void beforeWaitForRemoteBackupSynchronization() {
|
||||||
}
|
}
|
||||||
|
@ -170,8 +176,6 @@ public class PluggableQuorumNettyNoGroupNameReplicatedFailoverTest extends Failo
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void setupHAPolicyConfiguration() {
|
protected void setupHAPolicyConfiguration() {
|
||||||
((ReplicationPrimaryPolicyConfiguration) liveConfig.getHAPolicyConfiguration())
|
|
||||||
.setCheckForLiveServer(true);
|
|
||||||
((ReplicationBackupPolicyConfiguration) backupConfig.getHAPolicyConfiguration())
|
((ReplicationBackupPolicyConfiguration) backupConfig.getHAPolicyConfiguration())
|
||||||
.setMaxSavedReplicatedJournalsSize(2)
|
.setMaxSavedReplicatedJournalsSize(2)
|
||||||
.setAllowFailBack(true);
|
.setAllowFailBack(true);
|
||||||
|
@ -210,4 +214,30 @@ public class PluggableQuorumNettyNoGroupNameReplicatedFailoverTest extends Failo
|
||||||
}
|
}
|
||||||
super.crash(sessions);
|
super.crash(sessions);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void decrementActivationSequenceForForceRestartOf(TestableServer testableServer) throws Exception {
|
||||||
|
doDecrementActivationSequenceForForceRestartOf(log, nodeManager, managerConfiguration);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void doDecrementActivationSequenceForForceRestartOf(Logger log, NodeManager nodeManager, DistributedPrimitiveManagerConfiguration distributedPrimitiveManagerConfiguration) throws Exception {
|
||||||
|
nodeManager.start();
|
||||||
|
long localActivation = nodeManager.readNodeActivationSequence();
|
||||||
|
// file based
|
||||||
|
FileBasedPrimitiveManager fileBasedPrimitiveManager = new FileBasedPrimitiveManager(distributedPrimitiveManagerConfiguration.getProperties());
|
||||||
|
fileBasedPrimitiveManager.start();
|
||||||
|
try {
|
||||||
|
MutableLong mutableLong = fileBasedPrimitiveManager.getMutableLong(nodeManager.getNodeId().toString());
|
||||||
|
|
||||||
|
if (!mutableLong.compareAndSet(localActivation + 1, localActivation)) {
|
||||||
|
throw new Exception("Failed to decrement coordinated activation sequence to:" + localActivation + ", not +1 : " + mutableLong.get());
|
||||||
|
}
|
||||||
|
log.warn("Intentionally decrementing coordinated activation sequence for test, may result is lost data");
|
||||||
|
|
||||||
|
} finally {
|
||||||
|
fileBasedPrimitiveManager.stop();
|
||||||
|
nodeManager.stop();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,7 +17,6 @@
|
||||||
package org.apache.activemq.artemis.tests.integration.cluster.failover.quorum;
|
package org.apache.activemq.artemis.tests.integration.cluster.failover.quorum;
|
||||||
|
|
||||||
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
|
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
|
||||||
import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration;
|
|
||||||
import org.apache.activemq.artemis.tests.integration.cluster.failover.NettyReplicationStopTest;
|
import org.apache.activemq.artemis.tests.integration.cluster.failover.NettyReplicationStopTest;
|
||||||
|
|
||||||
public class PluggableQuorumNettyReplicationStopTest extends NettyReplicationStopTest {
|
public class PluggableQuorumNettyReplicationStopTest extends NettyReplicationStopTest {
|
||||||
|
@ -29,7 +28,6 @@ public class PluggableQuorumNettyReplicationStopTest extends NettyReplicationSto
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void setupHAPolicyConfiguration() {
|
protected void setupHAPolicyConfiguration() {
|
||||||
((ReplicationPrimaryPolicyConfiguration) liveConfig.getHAPolicyConfiguration()).setCheckForLiveServer(true);
|
|
||||||
((ReplicationBackupPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setMaxSavedReplicatedJournalsSize(2).setAllowFailBack(true);
|
((ReplicationBackupPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setMaxSavedReplicatedJournalsSize(2).setAllowFailBack(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -17,7 +17,6 @@
|
||||||
package org.apache.activemq.artemis.tests.integration.cluster.failover.quorum;
|
package org.apache.activemq.artemis.tests.integration.cluster.failover.quorum;
|
||||||
|
|
||||||
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
|
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
|
||||||
import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration;
|
|
||||||
import org.apache.activemq.artemis.tests.integration.cluster.failover.PageCleanupWhileReplicaCatchupTest;
|
import org.apache.activemq.artemis.tests.integration.cluster.failover.PageCleanupWhileReplicaCatchupTest;
|
||||||
|
|
||||||
public class PluggableQuorumPageCleanupWhileReplicaCatchupTest extends PageCleanupWhileReplicaCatchupTest {
|
public class PluggableQuorumPageCleanupWhileReplicaCatchupTest extends PageCleanupWhileReplicaCatchupTest {
|
||||||
|
@ -29,7 +28,6 @@ public class PluggableQuorumPageCleanupWhileReplicaCatchupTest extends PageClean
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void setupHAPolicyConfiguration() {
|
protected void setupHAPolicyConfiguration() {
|
||||||
((ReplicationPrimaryPolicyConfiguration) liveConfig.getHAPolicyConfiguration()).setCheckForLiveServer(true);
|
|
||||||
((ReplicationBackupPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setMaxSavedReplicatedJournalsSize(2).setAllowFailBack(true);
|
((ReplicationBackupPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setMaxSavedReplicatedJournalsSize(2).setAllowFailBack(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -48,7 +48,6 @@ public class PluggableQuorumReplicaTimeoutTest extends ReplicaTimeoutTest {
|
||||||
managerConfiguration, managerConfiguration);
|
managerConfiguration, managerConfiguration);
|
||||||
ReplicationPrimaryPolicyConfiguration primaryConfiguration = ((ReplicationPrimaryPolicyConfiguration) liveConfig.getHAPolicyConfiguration());
|
ReplicationPrimaryPolicyConfiguration primaryConfiguration = ((ReplicationPrimaryPolicyConfiguration) liveConfig.getHAPolicyConfiguration());
|
||||||
primaryConfiguration.setInitialReplicationSyncTimeout(1000);
|
primaryConfiguration.setInitialReplicationSyncTimeout(1000);
|
||||||
primaryConfiguration.setCheckForLiveServer(true);
|
|
||||||
ReplicationBackupPolicyConfiguration backupConfiguration = ((ReplicationBackupPolicyConfiguration) backupConfig.getHAPolicyConfiguration());
|
ReplicationBackupPolicyConfiguration backupConfiguration = ((ReplicationBackupPolicyConfiguration) backupConfig.getHAPolicyConfiguration());
|
||||||
backupConfiguration.setInitialReplicationSyncTimeout(1000);
|
backupConfiguration.setInitialReplicationSyncTimeout(1000);
|
||||||
backupConfiguration.setMaxSavedReplicatedJournalsSize(2)
|
backupConfiguration.setMaxSavedReplicatedJournalsSize(2)
|
||||||
|
|
|
@ -18,11 +18,15 @@ package org.apache.activemq.artemis.tests.integration.cluster.failover.quorum;
|
||||||
|
|
||||||
import org.apache.activemq.artemis.api.core.client.ClientSession;
|
import org.apache.activemq.artemis.api.core.client.ClientSession;
|
||||||
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
|
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
|
||||||
import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration;
|
|
||||||
import org.apache.activemq.artemis.tests.integration.cluster.failover.LargeMessageFailoverTest;
|
import org.apache.activemq.artemis.tests.integration.cluster.failover.LargeMessageFailoverTest;
|
||||||
|
import org.apache.activemq.artemis.tests.integration.cluster.util.TestableServer;
|
||||||
|
import org.jboss.logging.Logger;
|
||||||
|
|
||||||
|
import static org.apache.activemq.artemis.tests.integration.cluster.failover.quorum.PluggableQuorumNettyNoGroupNameReplicatedFailoverTest.doDecrementActivationSequenceForForceRestartOf;
|
||||||
|
|
||||||
public class PluggableQuorumReplicatedLargeMessageFailoverTest extends LargeMessageFailoverTest {
|
public class PluggableQuorumReplicatedLargeMessageFailoverTest extends LargeMessageFailoverTest {
|
||||||
|
|
||||||
|
private static final Logger log = Logger.getLogger(PluggableQuorumReplicatedLargeMessageFailoverTest.class);
|
||||||
@Override
|
@Override
|
||||||
protected void createConfigs() throws Exception {
|
protected void createConfigs() throws Exception {
|
||||||
createPluggableReplicatedConfigs();
|
createPluggableReplicatedConfigs();
|
||||||
|
@ -30,7 +34,6 @@ public class PluggableQuorumReplicatedLargeMessageFailoverTest extends LargeMess
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void setupHAPolicyConfiguration() {
|
protected void setupHAPolicyConfiguration() {
|
||||||
((ReplicationPrimaryPolicyConfiguration) liveConfig.getHAPolicyConfiguration()).setCheckForLiveServer(true);
|
|
||||||
((ReplicationBackupPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setMaxSavedReplicatedJournalsSize(2).setAllowFailBack(true);
|
((ReplicationBackupPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setMaxSavedReplicatedJournalsSize(2).setAllowFailBack(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -58,4 +61,8 @@ public class PluggableQuorumReplicatedLargeMessageFailoverTest extends LargeMess
|
||||||
super.crash(sessions);
|
super.crash(sessions);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void decrementActivationSequenceForForceRestartOf(TestableServer liveServer) throws Exception {
|
||||||
|
doDecrementActivationSequenceForForceRestartOf(log, nodeManager, managerConfiguration);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.activemq.artemis.tests.integration.cluster.failover.quorum;
|
||||||
|
|
||||||
import org.apache.activemq.artemis.api.core.client.ClientSession;
|
import org.apache.activemq.artemis.api.core.client.ClientSession;
|
||||||
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
|
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
|
||||||
import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration;
|
|
||||||
import org.apache.activemq.artemis.tests.integration.cluster.util.BackupSyncDelay;
|
import org.apache.activemq.artemis.tests.integration.cluster.util.BackupSyncDelay;
|
||||||
import org.junit.After;
|
import org.junit.After;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
|
@ -66,7 +65,6 @@ public class PluggableQuorumReplicatedLargeMessageWithDelayFailoverTest extends
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void setupHAPolicyConfiguration() {
|
protected void setupHAPolicyConfiguration() {
|
||||||
((ReplicationPrimaryPolicyConfiguration) liveConfig.getHAPolicyConfiguration()).setCheckForLiveServer(true);
|
|
||||||
((ReplicationBackupPolicyConfiguration) backupConfig.getHAPolicyConfiguration())
|
((ReplicationBackupPolicyConfiguration) backupConfig.getHAPolicyConfiguration())
|
||||||
.setMaxSavedReplicatedJournalsSize(2).setAllowFailBack(true);
|
.setMaxSavedReplicatedJournalsSize(2).setAllowFailBack(true);
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,7 +17,6 @@
|
||||||
package org.apache.activemq.artemis.tests.integration.cluster.failover.quorum;
|
package org.apache.activemq.artemis.tests.integration.cluster.failover.quorum;
|
||||||
|
|
||||||
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
|
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
|
||||||
import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration;
|
|
||||||
import org.apache.activemq.artemis.tests.integration.cluster.failover.PagingFailoverTest;
|
import org.apache.activemq.artemis.tests.integration.cluster.failover.PagingFailoverTest;
|
||||||
|
|
||||||
public class PluggableQuorumReplicatedPagingFailoverTest extends PagingFailoverTest {
|
public class PluggableQuorumReplicatedPagingFailoverTest extends PagingFailoverTest {
|
||||||
|
@ -29,7 +28,6 @@ public class PluggableQuorumReplicatedPagingFailoverTest extends PagingFailoverT
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void setupHAPolicyConfiguration() {
|
protected void setupHAPolicyConfiguration() {
|
||||||
((ReplicationPrimaryPolicyConfiguration) liveConfig.getHAPolicyConfiguration()).setCheckForLiveServer(true);
|
|
||||||
((ReplicationBackupPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setMaxSavedReplicatedJournalsSize(2).setAllowFailBack(true);
|
((ReplicationBackupPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setMaxSavedReplicatedJournalsSize(2).setAllowFailBack(true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -520,6 +520,11 @@ public class ActiveMQServerControlUsingCoreTest extends ActiveMQServerControlTes
|
||||||
return (String) proxy.retrieveAttributeValue("nodeID");
|
return (String) proxy.retrieveAttributeValue("nodeID");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getActivationSequence() {
|
||||||
|
return (Long) proxy.retrieveAttributeValue("activationSequence");
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getManagementAddress() {
|
public String getManagementAddress() {
|
||||||
return (String) proxy.retrieveAttributeValue("managementAddress");
|
return (String) proxy.retrieveAttributeValue("managementAddress");
|
||||||
|
|
|
@ -45,8 +45,6 @@ public class PluggableQuorumReplicationFlowControlTest extends SharedNothingRepl
|
||||||
ReplicationBackupPolicyConfiguration haPolicy = ReplicationBackupPolicyConfiguration.withDefault();
|
ReplicationBackupPolicyConfiguration haPolicy = ReplicationBackupPolicyConfiguration.withDefault();
|
||||||
haPolicy.setDistributedManagerConfiguration(managerConfiguration);
|
haPolicy.setDistributedManagerConfiguration(managerConfiguration);
|
||||||
haPolicy.setClusterName("cluster");
|
haPolicy.setClusterName("cluster");
|
||||||
// fail-fast in order to let the backup to quickly retry syncing with primary
|
|
||||||
haPolicy.setVoteRetries(0);
|
|
||||||
return haPolicy;
|
return haPolicy;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -54,7 +52,6 @@ public class PluggableQuorumReplicationFlowControlTest extends SharedNothingRepl
|
||||||
protected HAPolicyConfiguration createReplicationLiveConfiguration() {
|
protected HAPolicyConfiguration createReplicationLiveConfiguration() {
|
||||||
ReplicationPrimaryPolicyConfiguration haPolicy = ReplicationPrimaryPolicyConfiguration.withDefault();
|
ReplicationPrimaryPolicyConfiguration haPolicy = ReplicationPrimaryPolicyConfiguration.withDefault();
|
||||||
haPolicy.setDistributedManagerConfiguration(managerConfiguration);
|
haPolicy.setDistributedManagerConfiguration(managerConfiguration);
|
||||||
haPolicy.setCheckForLiveServer(false);
|
|
||||||
return haPolicy;
|
return haPolicy;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.activemq.artemis.tests.integration.replication;
|
||||||
|
|
||||||
import org.apache.activemq.artemis.api.core.TransportConfiguration;
|
import org.apache.activemq.artemis.api.core.TransportConfiguration;
|
||||||
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
|
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
|
||||||
import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration;
|
|
||||||
import org.junit.runner.RunWith;
|
import org.junit.runner.RunWith;
|
||||||
import org.junit.runners.Parameterized;
|
import org.junit.runners.Parameterized;
|
||||||
|
|
||||||
|
@ -30,7 +29,7 @@ public class PluggableQuorumReplicationOrderTest extends ReplicationOrderTest {
|
||||||
@Parameterized.Parameter
|
@Parameterized.Parameter
|
||||||
public boolean useNetty;
|
public boolean useNetty;
|
||||||
|
|
||||||
@Parameterized.Parameters(name = "useNetty={1}")
|
@Parameterized.Parameters(name = "useNetty={0}")
|
||||||
public static Iterable<Object[]> getParams() {
|
public static Iterable<Object[]> getParams() {
|
||||||
return asList(new Object[][]{{false}, {true}});
|
return asList(new Object[][]{{false}, {true}});
|
||||||
}
|
}
|
||||||
|
@ -42,8 +41,6 @@ public class PluggableQuorumReplicationOrderTest extends ReplicationOrderTest {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void setupHAPolicyConfiguration() {
|
protected void setupHAPolicyConfiguration() {
|
||||||
((ReplicationPrimaryPolicyConfiguration) liveConfig.getHAPolicyConfiguration())
|
|
||||||
.setCheckForLiveServer(true);
|
|
||||||
((ReplicationBackupPolicyConfiguration) backupConfig.getHAPolicyConfiguration())
|
((ReplicationBackupPolicyConfiguration) backupConfig.getHAPolicyConfiguration())
|
||||||
.setMaxSavedReplicatedJournalsSize(2)
|
.setMaxSavedReplicatedJournalsSize(2)
|
||||||
.setAllowFailBack(true);
|
.setAllowFailBack(true);
|
||||||
|
|
|
@ -18,14 +18,37 @@ package org.apache.activemq.artemis.tests.integration.replication;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
import java.util.concurrent.CountDownLatch;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
import org.apache.activemq.artemis.api.core.Message;
|
||||||
|
import org.apache.activemq.artemis.api.core.QueueConfiguration;
|
||||||
|
import org.apache.activemq.artemis.api.core.RoutingType;
|
||||||
|
import org.apache.activemq.artemis.api.core.client.ClientConsumer;
|
||||||
|
import org.apache.activemq.artemis.api.core.client.ClientMessage;
|
||||||
|
import org.apache.activemq.artemis.api.core.client.ClientProducer;
|
||||||
|
import org.apache.activemq.artemis.api.core.client.ClientSession;
|
||||||
|
import org.apache.activemq.artemis.api.core.client.ClientSessionFactory;
|
||||||
|
import org.apache.activemq.artemis.api.core.client.ServerLocator;
|
||||||
|
import org.apache.activemq.artemis.core.client.impl.ServerLocatorImpl;
|
||||||
|
import org.apache.activemq.artemis.core.config.Configuration;
|
||||||
import org.apache.activemq.artemis.core.config.HAPolicyConfiguration;
|
import org.apache.activemq.artemis.core.config.HAPolicyConfiguration;
|
||||||
import org.apache.activemq.artemis.core.config.ha.DistributedPrimitiveManagerConfiguration;
|
import org.apache.activemq.artemis.core.config.ha.DistributedPrimitiveManagerConfiguration;
|
||||||
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
|
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
|
||||||
import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration;
|
import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration;
|
||||||
|
import org.apache.activemq.artemis.core.server.ActivateCallback;
|
||||||
|
import org.apache.activemq.artemis.core.server.ActiveMQServer;
|
||||||
|
import org.apache.activemq.artemis.core.server.ActiveMQServers;
|
||||||
|
import org.apache.activemq.artemis.core.server.impl.FileLockNodeManager;
|
||||||
|
import org.apache.activemq.artemis.quorum.DistributedLock;
|
||||||
|
import org.apache.activemq.artemis.quorum.DistributedPrimitiveManager;
|
||||||
|
import org.apache.activemq.artemis.quorum.MutableLong;
|
||||||
import org.apache.activemq.artemis.quorum.file.FileBasedPrimitiveManager;
|
import org.apache.activemq.artemis.quorum.file.FileBasedPrimitiveManager;
|
||||||
|
import org.apache.activemq.artemis.tests.util.Wait;
|
||||||
|
import org.junit.Assert;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
import org.junit.Rule;
|
import org.junit.Rule;
|
||||||
|
import org.junit.Test;
|
||||||
import org.junit.rules.TemporaryFolder;
|
import org.junit.rules.TemporaryFolder;
|
||||||
|
|
||||||
public class PluggableQuorumReplicationTest extends SharedNothingReplicationTest {
|
public class PluggableQuorumReplicationTest extends SharedNothingReplicationTest {
|
||||||
|
@ -43,7 +66,6 @@ public class PluggableQuorumReplicationTest extends SharedNothingReplicationTest
|
||||||
protected HAPolicyConfiguration createReplicationLiveConfiguration() {
|
protected HAPolicyConfiguration createReplicationLiveConfiguration() {
|
||||||
ReplicationPrimaryPolicyConfiguration haPolicy = ReplicationPrimaryPolicyConfiguration.withDefault();
|
ReplicationPrimaryPolicyConfiguration haPolicy = ReplicationPrimaryPolicyConfiguration.withDefault();
|
||||||
haPolicy.setDistributedManagerConfiguration(managerConfiguration);
|
haPolicy.setDistributedManagerConfiguration(managerConfiguration);
|
||||||
haPolicy.setCheckForLiveServer(false);
|
|
||||||
return haPolicy;
|
return haPolicy;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -55,4 +77,616 @@ public class PluggableQuorumReplicationTest extends SharedNothingReplicationTest
|
||||||
return haPolicy;
|
return haPolicy;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testUnReplicatedOrderedTransition() throws Exception {
|
||||||
|
// start live
|
||||||
|
final Configuration liveConfiguration = createLiveConfiguration();
|
||||||
|
|
||||||
|
ActiveMQServer liveServer = addServer(ActiveMQServers.newActiveMQServer(liveConfiguration));
|
||||||
|
liveServer.setIdentity("LIVE");
|
||||||
|
liveServer.start();
|
||||||
|
|
||||||
|
Wait.waitFor(liveServer::isStarted);
|
||||||
|
|
||||||
|
ServerLocator locator = ServerLocatorImpl.newLocator("(tcp://localhost:61616,tcp://localhost:61617)?ha=true");
|
||||||
|
locator.setCallTimeout(60_000L);
|
||||||
|
locator.setConnectionTTL(60_000L);
|
||||||
|
|
||||||
|
ClientSessionFactory csf = locator.createSessionFactory();
|
||||||
|
ClientSession clientSession = csf.createSession();
|
||||||
|
clientSession.createQueue(new QueueConfiguration("slow").setRoutingType(RoutingType.ANYCAST));
|
||||||
|
clientSession.close();
|
||||||
|
|
||||||
|
// start backup
|
||||||
|
Configuration backupConfiguration = createBackupConfiguration();
|
||||||
|
ActiveMQServer backupServer = addServer(ActiveMQServers.newActiveMQServer(backupConfiguration));
|
||||||
|
backupServer.setIdentity("BACKUP");
|
||||||
|
backupServer.start();
|
||||||
|
|
||||||
|
Wait.waitFor(backupServer::isStarted);
|
||||||
|
|
||||||
|
waitForTopology(liveServer, 1, 1, 30000);
|
||||||
|
waitForTopology(backupServer, 1, 1, 30000);
|
||||||
|
|
||||||
|
liveServer.stop();
|
||||||
|
|
||||||
|
// backup will take over and run un replicated
|
||||||
|
|
||||||
|
csf = locator.createSessionFactory();
|
||||||
|
clientSession = csf.createSession();
|
||||||
|
clientSession.createQueue(new QueueConfiguration("slow_un_replicated").setRoutingType(RoutingType.ANYCAST));
|
||||||
|
clientSession.close();
|
||||||
|
|
||||||
|
waitForTopology(backupServer, 1, 0, 30000);
|
||||||
|
assertTrue(Wait.waitFor(() -> 2L == backupServer.getNodeManager().getNodeActivationSequence()));
|
||||||
|
|
||||||
|
backupServer.stop(false);
|
||||||
|
|
||||||
|
// now only backup should be able to start as it has run un_replicated
|
||||||
|
liveServer.start();
|
||||||
|
Wait.assertFalse(liveServer::isActive);
|
||||||
|
liveServer.stop();
|
||||||
|
|
||||||
|
// restart backup
|
||||||
|
backupServer.start();
|
||||||
|
|
||||||
|
Wait.waitFor(backupServer::isStarted);
|
||||||
|
assertEquals(3L, backupServer.getNodeManager().getNodeActivationSequence());
|
||||||
|
|
||||||
|
csf = locator.createSessionFactory();
|
||||||
|
clientSession = csf.createSession();
|
||||||
|
clientSession.createQueue(new QueueConfiguration("backup_as_un_replicated").setRoutingType(RoutingType.ANYCAST));
|
||||||
|
clientSession.close();
|
||||||
|
|
||||||
|
// verify the live restart as a backup to the restarted backupServer that has taken on the live role, no failback
|
||||||
|
liveServer.start();
|
||||||
|
|
||||||
|
csf = locator.createSessionFactory();
|
||||||
|
clientSession = csf.createSession();
|
||||||
|
clientSession.createQueue(new QueueConfiguration("backup_as_replicated").setRoutingType(RoutingType.ANYCAST));
|
||||||
|
clientSession.close();
|
||||||
|
|
||||||
|
assertTrue(Wait.waitFor(liveServer::isReplicaSync));
|
||||||
|
assertTrue(Wait.waitFor(() -> 3L == liveServer.getNodeManager().getNodeActivationSequence()));
|
||||||
|
|
||||||
|
backupServer.stop(true);
|
||||||
|
|
||||||
|
waitForTopology(liveServer, 1, 0, 30000);
|
||||||
|
assertTrue(Wait.waitFor(() -> 4L == liveServer.getNodeManager().getNodeActivationSequence()));
|
||||||
|
|
||||||
|
liveServer.stop(true);
|
||||||
|
clientSession.close();
|
||||||
|
locator.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testBackupFailoverAndPrimaryFailback() throws Exception {
|
||||||
|
final int timeout = (int) TimeUnit.SECONDS.toMillis(30);
|
||||||
|
|
||||||
|
// start live
|
||||||
|
Configuration liveConfiguration = createLiveConfiguration();
|
||||||
|
|
||||||
|
ActiveMQServer primaryInstance = addServer(ActiveMQServers.newActiveMQServer(liveConfiguration));
|
||||||
|
primaryInstance.setIdentity("PRIMARY");
|
||||||
|
primaryInstance.start();
|
||||||
|
|
||||||
|
// primary initially UN REPLICATED
|
||||||
|
Assert.assertEquals(1L, primaryInstance.getNodeManager().getNodeActivationSequence());
|
||||||
|
|
||||||
|
// start backup
|
||||||
|
Configuration backupConfiguration = createBackupConfiguration();
|
||||||
|
((ReplicationBackupPolicyConfiguration)backupConfiguration.getHAPolicyConfiguration()).setAllowFailBack(true);
|
||||||
|
|
||||||
|
ActiveMQServer backupServer = addServer(ActiveMQServers.newActiveMQServer(backupConfiguration));
|
||||||
|
backupServer.setIdentity("BACKUP");
|
||||||
|
backupServer.start();
|
||||||
|
|
||||||
|
Wait.waitFor(backupServer::isStarted);
|
||||||
|
|
||||||
|
org.apache.activemq.artemis.utils.Wait.assertTrue(() -> backupServer.isReplicaSync(), timeout);
|
||||||
|
|
||||||
|
// primary REPLICATED, backup matches (has replicated) activation sequence
|
||||||
|
Assert.assertEquals(1L, primaryInstance.getNodeManager().getNodeActivationSequence());
|
||||||
|
Assert.assertEquals(1L, backupServer.getNodeManager().getNodeActivationSequence());
|
||||||
|
|
||||||
|
primaryInstance.stop();
|
||||||
|
|
||||||
|
// backup UN REPLICATED (new version)
|
||||||
|
org.apache.activemq.artemis.utils.Wait.assertTrue(() -> 2L == backupServer.getNodeManager().getNodeActivationSequence(), timeout);
|
||||||
|
|
||||||
|
// just to let the console logging breath!
|
||||||
|
TimeUnit.MILLISECONDS.sleep(100);
|
||||||
|
|
||||||
|
// restart primary that will request failback
|
||||||
|
ActiveMQServer restartedPrimaryForFailBack = primaryInstance; //addServer(ActiveMQServers.newActiveMQServer(liveConfiguration));
|
||||||
|
restartedPrimaryForFailBack.start();
|
||||||
|
|
||||||
|
// first step is backup getting replicated
|
||||||
|
org.apache.activemq.artemis.utils.Wait.assertTrue(() -> backupServer.isReplicaSync(), timeout);
|
||||||
|
|
||||||
|
// restarted primary will run un replicated (increment sequence) while backup restarts to revert to backup role.
|
||||||
|
org.apache.activemq.artemis.utils.Wait.assertTrue(() -> {
|
||||||
|
try {
|
||||||
|
return 3L == restartedPrimaryForFailBack.getNodeManager().getNodeActivationSequence();
|
||||||
|
} catch (NullPointerException ok) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}, timeout);
|
||||||
|
|
||||||
|
// the backup should then resume with an insync replica view of that version
|
||||||
|
org.apache.activemq.artemis.utils.Wait.assertTrue(() -> restartedPrimaryForFailBack.isReplicaSync(), timeout);
|
||||||
|
org.apache.activemq.artemis.utils.Wait.assertTrue(() -> backupServer.isReplicaSync(), timeout);
|
||||||
|
org.apache.activemq.artemis.utils.Wait.assertTrue(() -> 3L == backupServer.getNodeManager().getNodeActivationSequence(), timeout);
|
||||||
|
|
||||||
|
// just to let the console logging breath!
|
||||||
|
TimeUnit.MILLISECONDS.sleep(100);
|
||||||
|
|
||||||
|
// stop backup to verify primary goes on with new sequence as un replicated
|
||||||
|
backupServer.stop();
|
||||||
|
|
||||||
|
// just to let the console logging breath!
|
||||||
|
TimeUnit.MILLISECONDS.sleep(100);
|
||||||
|
|
||||||
|
// live goes un replicated
|
||||||
|
org.apache.activemq.artemis.utils.Wait.assertTrue(() -> {
|
||||||
|
try {
|
||||||
|
return 4L == restartedPrimaryForFailBack.getNodeManager().getNodeActivationSequence();
|
||||||
|
} catch (NullPointerException ok) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}, timeout);
|
||||||
|
|
||||||
|
restartedPrimaryForFailBack.stop();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPrimaryIncrementActivationSequenceOnUnReplicated() throws Exception {
|
||||||
|
final int timeout = (int) TimeUnit.SECONDS.toMillis(30);
|
||||||
|
|
||||||
|
// start live
|
||||||
|
Configuration liveConfiguration = createLiveConfiguration();
|
||||||
|
|
||||||
|
ActiveMQServer primaryInstance = addServer(ActiveMQServers.newActiveMQServer(liveConfiguration));
|
||||||
|
primaryInstance.setIdentity("PRIMARY");
|
||||||
|
primaryInstance.start();
|
||||||
|
|
||||||
|
// primary UN REPLICATED
|
||||||
|
Assert.assertEquals(1L, primaryInstance.getNodeManager().getNodeActivationSequence());
|
||||||
|
|
||||||
|
// start backup
|
||||||
|
Configuration backupConfiguration = createBackupConfiguration();
|
||||||
|
|
||||||
|
ActiveMQServer backupServer = addServer(ActiveMQServers.newActiveMQServer(backupConfiguration));
|
||||||
|
backupServer.setIdentity("BACKUP");
|
||||||
|
backupServer.start();
|
||||||
|
|
||||||
|
Wait.waitFor(backupServer::isStarted);
|
||||||
|
|
||||||
|
org.apache.activemq.artemis.utils.Wait.assertTrue(() -> backupServer.isReplicaSync(), timeout);
|
||||||
|
|
||||||
|
// primary REPLICATED, backup matches (has replicated) activation sequence
|
||||||
|
Assert.assertEquals(1L, primaryInstance.getNodeManager().getNodeActivationSequence());
|
||||||
|
Assert.assertEquals(1L, backupServer.getNodeManager().getNodeActivationSequence());
|
||||||
|
|
||||||
|
// transition to un replicated once backup goes away
|
||||||
|
backupServer.stop();
|
||||||
|
|
||||||
|
org.apache.activemq.artemis.utils.Wait.assertTrue(() -> 2L == primaryInstance.getNodeManager().getNodeActivationSequence(), timeout);
|
||||||
|
|
||||||
|
// done
|
||||||
|
primaryInstance.stop();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testBackupStartsFirst() throws Exception {
|
||||||
|
|
||||||
|
// start backup
|
||||||
|
Configuration backupConfiguration = createBackupConfiguration();
|
||||||
|
ActiveMQServer backupServer = addServer(ActiveMQServers.newActiveMQServer(backupConfiguration));
|
||||||
|
backupServer.setIdentity("BACKUP");
|
||||||
|
backupServer.start();
|
||||||
|
|
||||||
|
// start live
|
||||||
|
final Configuration liveConfiguration = createLiveConfiguration();
|
||||||
|
|
||||||
|
ActiveMQServer liveServer = addServer(ActiveMQServers.newActiveMQServer(liveConfiguration));
|
||||||
|
liveServer.setIdentity("LIVE");
|
||||||
|
liveServer.start();
|
||||||
|
|
||||||
|
Wait.waitFor(liveServer::isStarted);
|
||||||
|
|
||||||
|
assertTrue(Wait.waitFor(backupServer::isStarted));
|
||||||
|
assertTrue(Wait.waitFor(backupServer::isReplicaSync));
|
||||||
|
assertTrue(liveServer.isReplicaSync());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testBackupOutOfSequenceReleasesLock() throws Exception {
|
||||||
|
|
||||||
|
// start backup
|
||||||
|
Configuration backupConfiguration = createBackupConfiguration();
|
||||||
|
ActiveMQServer backupServer = addServer(ActiveMQServers.newActiveMQServer(backupConfiguration));
|
||||||
|
backupServer.setIdentity("BACKUP");
|
||||||
|
backupServer.start();
|
||||||
|
|
||||||
|
// start live
|
||||||
|
final Configuration liveConfiguration = createLiveConfiguration();
|
||||||
|
|
||||||
|
ActiveMQServer liveServer = addServer(ActiveMQServers.newActiveMQServer(liveConfiguration));
|
||||||
|
liveServer.setIdentity("LIVE");
|
||||||
|
liveServer.start();
|
||||||
|
|
||||||
|
Wait.waitFor(liveServer::isStarted);
|
||||||
|
|
||||||
|
assertTrue(Wait.waitFor(backupServer::isStarted));
|
||||||
|
assertTrue(Wait.waitFor(backupServer::isReplicaSync));
|
||||||
|
assertTrue(liveServer.isReplicaSync());
|
||||||
|
|
||||||
|
backupServer.stop();
|
||||||
|
|
||||||
|
TimeUnit.SECONDS.sleep(1);
|
||||||
|
|
||||||
|
liveServer.stop();
|
||||||
|
// backup can get lock but does not have the sequence to start, will try and be a backup
|
||||||
|
|
||||||
|
backupServer.start();
|
||||||
|
|
||||||
|
// live server should be active
|
||||||
|
liveServer.start();
|
||||||
|
Wait.waitFor(liveServer::isStarted);
|
||||||
|
|
||||||
|
assertTrue(Wait.waitFor(backupServer::isStarted));
|
||||||
|
assertTrue(Wait.waitFor(backupServer::isReplicaSync));
|
||||||
|
assertTrue(liveServer.isReplicaSync());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testBackupOutOfSequenceCheckActivationSequence() throws Exception {
|
||||||
|
|
||||||
|
// start backup
|
||||||
|
Configuration backupConfiguration = createBackupConfiguration();
|
||||||
|
ActiveMQServer backupServer = addServer(ActiveMQServers.newActiveMQServer(backupConfiguration));
|
||||||
|
backupServer.setIdentity("BACKUP");
|
||||||
|
backupServer.start();
|
||||||
|
|
||||||
|
// start live
|
||||||
|
final Configuration liveConfiguration = createLiveConfiguration();
|
||||||
|
|
||||||
|
ActiveMQServer liveServer = addServer(ActiveMQServers.newActiveMQServer(liveConfiguration));
|
||||||
|
liveServer.setIdentity("LIVE");
|
||||||
|
liveServer.start();
|
||||||
|
|
||||||
|
Wait.waitFor(liveServer::isStarted);
|
||||||
|
|
||||||
|
assertTrue(Wait.waitFor(backupServer::isStarted));
|
||||||
|
assertTrue(Wait.waitFor(backupServer::isReplicaSync));
|
||||||
|
assertTrue(liveServer.isReplicaSync());
|
||||||
|
|
||||||
|
backupServer.stop();
|
||||||
|
|
||||||
|
TimeUnit.SECONDS.sleep(1);
|
||||||
|
|
||||||
|
final String coordinatedId = liveServer.getNodeID().toString();
|
||||||
|
liveServer.stop();
|
||||||
|
|
||||||
|
// backup can get lock but does not have the sequence to start, will try and be a backup
|
||||||
|
// to verify it can short circuit with a dirty read we grab the lock for a little while
|
||||||
|
DistributedPrimitiveManager distributedPrimitiveManager = DistributedPrimitiveManager.newInstanceOf(
|
||||||
|
managerConfiguration.getClassName(),
|
||||||
|
managerConfiguration.getProperties());
|
||||||
|
distributedPrimitiveManager.start();
|
||||||
|
final DistributedLock lock = distributedPrimitiveManager.getDistributedLock(coordinatedId);
|
||||||
|
assertTrue(lock.tryLock());
|
||||||
|
CountDownLatch preActivate = new CountDownLatch(1);
|
||||||
|
backupServer.registerActivateCallback(new ActivateCallback() {
|
||||||
|
@Override
|
||||||
|
public void preActivate() {
|
||||||
|
ActivateCallback.super.preActivate();
|
||||||
|
preActivate.countDown();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
backupServer.start();
|
||||||
|
|
||||||
|
// it should be able to do a dirty read of the sequence id and not have to wait to get a lock
|
||||||
|
assertTrue(preActivate.await(1, TimeUnit.SECONDS));
|
||||||
|
|
||||||
|
// release the lock
|
||||||
|
distributedPrimitiveManager.stop();
|
||||||
|
|
||||||
|
// live server should be active
|
||||||
|
liveServer.start();
|
||||||
|
Wait.waitFor(liveServer::isStarted);
|
||||||
|
|
||||||
|
assertTrue(Wait.waitFor(backupServer::isReplicaSync));
|
||||||
|
assertTrue(liveServer.isReplicaSync());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSelfRepairPrimary() throws Exception {
|
||||||
|
// start live
|
||||||
|
final Configuration liveConfiguration = createLiveConfiguration();
|
||||||
|
ActiveMQServer liveServer = addServer(ActiveMQServers.newActiveMQServer(liveConfiguration));
|
||||||
|
liveServer.setIdentity("LIVE");
|
||||||
|
|
||||||
|
liveServer.start();
|
||||||
|
final String coordinatedId = liveServer.getNodeID().toString();
|
||||||
|
Wait.waitFor(liveServer::isStarted);
|
||||||
|
liveServer.stop();
|
||||||
|
|
||||||
|
liveServer.start();
|
||||||
|
Wait.waitFor(liveServer::isStarted);
|
||||||
|
Assert.assertEquals(2, liveServer.getNodeManager().getNodeActivationSequence());
|
||||||
|
liveServer.stop();
|
||||||
|
|
||||||
|
// backup can get lock but does not have the sequence to start, will try and be a backup
|
||||||
|
// to verify it can short circuit with a dirty read we grab the lock for a little while
|
||||||
|
DistributedPrimitiveManager distributedPrimitiveManager = DistributedPrimitiveManager
|
||||||
|
.newInstanceOf(managerConfiguration.getClassName(), managerConfiguration.getProperties());
|
||||||
|
distributedPrimitiveManager.start();
|
||||||
|
try (DistributedLock lock = distributedPrimitiveManager.getDistributedLock(coordinatedId)) {
|
||||||
|
assertTrue(lock.tryLock());
|
||||||
|
distributedPrimitiveManager.getMutableLong(coordinatedId).compareAndSet(2, -2);
|
||||||
|
}
|
||||||
|
liveServer.start();
|
||||||
|
Wait.waitFor(liveServer::isStarted);
|
||||||
|
Assert.assertEquals(2, liveServer.getNodeManager().getNodeActivationSequence());
|
||||||
|
Assert.assertEquals(2, distributedPrimitiveManager.getMutableLong(coordinatedId).get());
|
||||||
|
|
||||||
|
distributedPrimitiveManager.stop();
|
||||||
|
|
||||||
|
Configuration backupConfiguration = createBackupConfiguration();
|
||||||
|
ActiveMQServer backupServer = addServer(ActiveMQServers.newActiveMQServer(backupConfiguration));
|
||||||
|
backupServer.setIdentity("BACKUP");
|
||||||
|
backupServer.start();
|
||||||
|
Wait.waitFor(backupServer::isReplicaSync);
|
||||||
|
Assert.assertEquals(2, backupServer.getNodeManager().getNodeActivationSequence());
|
||||||
|
backupServer.stop();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPrimaryPeers() throws Exception {
|
||||||
|
final String PEER_NODE_ID = "some-shared-id-001";
|
||||||
|
|
||||||
|
final Configuration liveConfiguration = createLiveConfiguration();
|
||||||
|
((ReplicationPrimaryPolicyConfiguration)liveConfiguration.getHAPolicyConfiguration()).setCoordinationId(PEER_NODE_ID);
|
||||||
|
|
||||||
|
ActiveMQServer liveServer = addServer(ActiveMQServers.newActiveMQServer(liveConfiguration));
|
||||||
|
liveServer.setIdentity("LIVE");
|
||||||
|
liveServer.start();
|
||||||
|
|
||||||
|
Wait.waitFor(liveServer::isStarted);
|
||||||
|
|
||||||
|
ServerLocator locator = ServerLocatorImpl.newLocator("(tcp://localhost:61616,tcp://localhost:61617)?ha=true");
|
||||||
|
locator.setCallTimeout(60_000L);
|
||||||
|
locator.setConnectionTTL(60_000L);
|
||||||
|
final ClientSessionFactory keepLocatorAliveSLF = locator.createSessionFactory();
|
||||||
|
|
||||||
|
ClientSessionFactory csf = locator.createSessionFactory();
|
||||||
|
sendTo(csf, "live_un_replicated");
|
||||||
|
csf.close();
|
||||||
|
|
||||||
|
// start peer, will backup
|
||||||
|
Configuration peerLiveConfiguration = createBackupConfiguration(); // to get acceptors and locators ports that won't clash
|
||||||
|
peerLiveConfiguration.setHAPolicyConfiguration(createReplicationLiveConfiguration());
|
||||||
|
((ReplicationPrimaryPolicyConfiguration)peerLiveConfiguration.getHAPolicyConfiguration()).setCoordinationId(PEER_NODE_ID);
|
||||||
|
peerLiveConfiguration.setName("localhost::live-peer");
|
||||||
|
|
||||||
|
ActiveMQServer livePeerServer = addServer(ActiveMQServers.newActiveMQServer(peerLiveConfiguration));
|
||||||
|
livePeerServer.setIdentity("LIVE-PEER");
|
||||||
|
livePeerServer.start();
|
||||||
|
|
||||||
|
Wait.waitFor(livePeerServer::isStarted);
|
||||||
|
|
||||||
|
waitForTopology(liveServer, 1, 1, 30000);
|
||||||
|
waitForTopology(livePeerServer, 1, 1, 30000);
|
||||||
|
|
||||||
|
liveServer.stop();
|
||||||
|
|
||||||
|
// livePeerServer will take over and run un replicated
|
||||||
|
|
||||||
|
csf = locator.createSessionFactory();
|
||||||
|
receiveFrom(csf, "live_un_replicated");
|
||||||
|
sendTo(csf, "peer_un_replicated");
|
||||||
|
csf.close();
|
||||||
|
|
||||||
|
waitForTopology(livePeerServer, 1, 0, 30000);
|
||||||
|
|
||||||
|
assertTrue(Wait.waitFor(() -> 2L == livePeerServer.getNodeManager().getNodeActivationSequence()));
|
||||||
|
|
||||||
|
livePeerServer.stop(false);
|
||||||
|
|
||||||
|
liveServer.start();
|
||||||
|
|
||||||
|
Wait.assertTrue(() -> !liveServer.isActive());
|
||||||
|
|
||||||
|
// restart backup
|
||||||
|
livePeerServer.start();
|
||||||
|
|
||||||
|
Wait.waitFor(livePeerServer::isStarted);
|
||||||
|
|
||||||
|
assertEquals(3L, livePeerServer.getNodeManager().getNodeActivationSequence());
|
||||||
|
|
||||||
|
csf = locator.createSessionFactory();
|
||||||
|
receiveFrom(csf, "peer_un_replicated");
|
||||||
|
sendTo(csf, "backup_as_un_replicated");
|
||||||
|
csf.close();
|
||||||
|
|
||||||
|
// verify the live restart as a backup to the restarted PeerLiveServer that has taken on the live role
|
||||||
|
liveServer.start();
|
||||||
|
|
||||||
|
csf = locator.createSessionFactory();
|
||||||
|
receiveFrom(csf, "backup_as_un_replicated");
|
||||||
|
sendTo(csf, "backup_as_replicated");
|
||||||
|
csf.close();
|
||||||
|
|
||||||
|
assertTrue(Wait.waitFor(liveServer::isReplicaSync));
|
||||||
|
assertTrue(Wait.waitFor(() -> 3L == liveServer.getNodeManager().getNodeActivationSequence()));
|
||||||
|
|
||||||
|
waitForTopology(liveServer, 1, 1, 30000);
|
||||||
|
waitForTopology(livePeerServer, 1, 1, 30000);
|
||||||
|
|
||||||
|
livePeerServer.stop(true);
|
||||||
|
|
||||||
|
assertTrue(Wait.waitFor(() -> 4L == liveServer.getNodeManager().getNodeActivationSequence()));
|
||||||
|
|
||||||
|
csf = locator.createSessionFactory();
|
||||||
|
receiveFrom(csf, "backup_as_replicated");
|
||||||
|
csf.close();
|
||||||
|
|
||||||
|
waitForTopology(liveServer, 1, 0, 30000);
|
||||||
|
|
||||||
|
liveServer.stop(true);
|
||||||
|
keepLocatorAliveSLF.close();
|
||||||
|
locator.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testUnavailableSelfHeal() throws Exception {
|
||||||
|
|
||||||
|
// start backup
|
||||||
|
Configuration backupConfiguration = createBackupConfiguration();
|
||||||
|
ActiveMQServer backupServer = addServer(ActiveMQServers.newActiveMQServer(backupConfiguration));
|
||||||
|
backupServer.setIdentity("BACKUP");
|
||||||
|
backupServer.start();
|
||||||
|
|
||||||
|
// start live
|
||||||
|
final Configuration liveConfiguration = createLiveConfiguration();
|
||||||
|
|
||||||
|
ActiveMQServer liveServer = addServer(ActiveMQServers.newActiveMQServer(liveConfiguration));
|
||||||
|
liveServer.setIdentity("LIVE");
|
||||||
|
liveServer.start();
|
||||||
|
|
||||||
|
Wait.waitFor(liveServer::isStarted);
|
||||||
|
|
||||||
|
assertTrue(Wait.waitFor(backupServer::isStarted));
|
||||||
|
assertTrue(Wait.waitFor(backupServer::isReplicaSync));
|
||||||
|
assertTrue(liveServer.isReplicaSync());
|
||||||
|
|
||||||
|
final String coordinatedId = liveServer.getNodeID().toString();
|
||||||
|
|
||||||
|
backupServer.stop();
|
||||||
|
TimeUnit.MILLISECONDS.sleep(500);
|
||||||
|
liveServer.stop();
|
||||||
|
|
||||||
|
// some manual intervention to force an unavailable
|
||||||
|
// simulate live failing in activation local sequence update on un replicated run when backup stops.
|
||||||
|
|
||||||
|
DistributedPrimitiveManager distributedPrimitiveManager = DistributedPrimitiveManager.newInstanceOf(managerConfiguration.getClassName(), managerConfiguration.getProperties());
|
||||||
|
distributedPrimitiveManager.start();
|
||||||
|
final MutableLong activationSequence = distributedPrimitiveManager.getMutableLong(coordinatedId);
|
||||||
|
Assert.assertTrue(activationSequence.compareAndSet(2, -2));
|
||||||
|
|
||||||
|
// case: 1, the fail to write locally 2 but the write actually succeeding
|
||||||
|
// should delay pending resolution of the uncommitted claim
|
||||||
|
backupServer.start();
|
||||||
|
|
||||||
|
// live server should activate after self healing its outstanding claim
|
||||||
|
liveServer.start();
|
||||||
|
Wait.waitFor(liveServer::isStarted);
|
||||||
|
|
||||||
|
assertTrue(Wait.waitFor(backupServer::isReplicaSync));
|
||||||
|
assertTrue(liveServer.isReplicaSync());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testUnavailableAdminIntervention() throws Exception {
|
||||||
|
// start backup
|
||||||
|
Configuration backupConfiguration = createBackupConfiguration();
|
||||||
|
ActiveMQServer backupServer = addServer(ActiveMQServers.newActiveMQServer(backupConfiguration));
|
||||||
|
backupServer.setIdentity("BACKUP");
|
||||||
|
backupServer.start();
|
||||||
|
|
||||||
|
// start live
|
||||||
|
final Configuration liveConfiguration = createLiveConfiguration();
|
||||||
|
|
||||||
|
ActiveMQServer liveServer = addServer(ActiveMQServers.newActiveMQServer(liveConfiguration));
|
||||||
|
liveServer.setIdentity("LIVE");
|
||||||
|
liveServer.start();
|
||||||
|
|
||||||
|
Wait.waitFor(liveServer::isStarted);
|
||||||
|
|
||||||
|
assertTrue(Wait.waitFor(backupServer::isStarted));
|
||||||
|
assertTrue(Wait.waitFor(backupServer::isReplicaSync));
|
||||||
|
assertTrue(liveServer.isReplicaSync());
|
||||||
|
|
||||||
|
final String coordinatedId = liveServer.getNodeID().toString();
|
||||||
|
|
||||||
|
System.err.println("coodr id: " + coordinatedId);
|
||||||
|
backupServer.stop();
|
||||||
|
TimeUnit.MILLISECONDS.sleep(500);
|
||||||
|
liveServer.stop();
|
||||||
|
|
||||||
|
// some manual intervention to force an unavailable
|
||||||
|
// simulate live failing in activation local sequence update on un replicated run when backup stops.
|
||||||
|
|
||||||
|
DistributedPrimitiveManager distributedPrimitiveManager = DistributedPrimitiveManager.newInstanceOf(
|
||||||
|
managerConfiguration.getClassName(),
|
||||||
|
managerConfiguration.getProperties());
|
||||||
|
distributedPrimitiveManager.start();
|
||||||
|
final MutableLong coordinatedActivationSequence = distributedPrimitiveManager.getMutableLong(coordinatedId);
|
||||||
|
Assert.assertTrue(coordinatedActivationSequence.compareAndSet(2, -2));
|
||||||
|
|
||||||
|
// case: 2, the fail to write locally 2 but the write actually failing
|
||||||
|
// need to put 1 in the local activation sequence of the live
|
||||||
|
FileLockNodeManager fileLockNodeManager = new FileLockNodeManager(liveConfiguration.getNodeManagerLockLocation().getAbsoluteFile(), true);
|
||||||
|
fileLockNodeManager.start();
|
||||||
|
assertEquals(2, fileLockNodeManager.readNodeActivationSequence());
|
||||||
|
fileLockNodeManager.writeNodeActivationSequence(1);
|
||||||
|
fileLockNodeManager.stop();
|
||||||
|
|
||||||
|
// should delay pending resolution of the uncommitted claim
|
||||||
|
backupServer.start();
|
||||||
|
CountDownLatch liveStarting = new CountDownLatch(1);
|
||||||
|
// should delay pending resolution of the uncommitted claim
|
||||||
|
// IMPORTANT: primary activation run on the start caller thread!! We need another thread here
|
||||||
|
final Thread liveServerStarterThread = new Thread(() -> {
|
||||||
|
liveStarting.countDown();
|
||||||
|
try {
|
||||||
|
liveServer.start();
|
||||||
|
} catch (Throwable e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
liveServerStarterThread.start();
|
||||||
|
liveStarting.await();
|
||||||
|
TimeUnit.MILLISECONDS.sleep(500);
|
||||||
|
// both are candidates and one of them failed to commit the claim
|
||||||
|
// let them compete on retry
|
||||||
|
Assert.assertTrue(coordinatedActivationSequence.compareAndSet(-2, 1));
|
||||||
|
// one of the two can activate
|
||||||
|
Wait.waitFor(() -> liveServer.isStarted() || backupServer.isStarted());
|
||||||
|
|
||||||
|
assertTrue(Wait.waitFor(backupServer::isReplicaSync));
|
||||||
|
assertTrue(liveServer.isReplicaSync());
|
||||||
|
|
||||||
|
assertEquals(2, backupServer.getNodeManager().getNodeActivationSequence());
|
||||||
|
assertEquals(2, liveServer.getNodeManager().getNodeActivationSequence());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private void sendTo(ClientSessionFactory clientSessionFactory, String addr) throws Exception {
|
||||||
|
ClientSession clientSession = clientSessionFactory.createSession(true, true);
|
||||||
|
clientSession.createQueue(new QueueConfiguration(addr).setRoutingType(RoutingType.ANYCAST).setDurable(true));
|
||||||
|
ClientProducer producer = clientSession.createProducer(addr);
|
||||||
|
ClientMessage message = clientSession.createMessage(true);
|
||||||
|
message.putStringProperty("K", addr);
|
||||||
|
message.putLongProperty("delay", 0L); // so slow interceptor does not get us
|
||||||
|
producer.send(message);
|
||||||
|
producer.close();
|
||||||
|
clientSession.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void receiveFrom(ClientSessionFactory clientSessionFactory, String addr) throws Exception {
|
||||||
|
ClientSession clientSession = clientSessionFactory.createSession(true, true);
|
||||||
|
clientSession.start();
|
||||||
|
ClientConsumer consumer = clientSession.createConsumer(addr);
|
||||||
|
Message message = consumer.receive(4000);
|
||||||
|
assertNotNull(message);
|
||||||
|
assertTrue(message.getStringProperty("K").equals(addr));
|
||||||
|
consumer.close();
|
||||||
|
clientSession.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -244,7 +244,7 @@ public class SharedNothingReplicationTest extends ActiveMQTestBase {
|
||||||
.setCheckForLiveServer(false);
|
.setCheckForLiveServer(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
private Configuration createLiveConfiguration() throws Exception {
|
protected Configuration createLiveConfiguration() throws Exception {
|
||||||
Configuration conf = new ConfigurationImpl();
|
Configuration conf = new ConfigurationImpl();
|
||||||
conf.setName("localhost::live");
|
conf.setName("localhost::live");
|
||||||
|
|
||||||
|
@ -275,7 +275,7 @@ public class SharedNothingReplicationTest extends ActiveMQTestBase {
|
||||||
return new ReplicaPolicyConfiguration().setClusterName("cluster");
|
return new ReplicaPolicyConfiguration().setClusterName("cluster");
|
||||||
}
|
}
|
||||||
|
|
||||||
private Configuration createBackupConfiguration() throws Exception {
|
protected Configuration createBackupConfiguration() throws Exception {
|
||||||
Configuration conf = new ConfigurationImpl();
|
Configuration conf = new ConfigurationImpl();
|
||||||
conf.setName("localhost::backup");
|
conf.setName("localhost::backup");
|
||||||
|
|
||||||
|
|
|
@ -24,7 +24,6 @@ import org.apache.activemq.artemis.api.core.client.ClientSession;
|
||||||
import org.apache.activemq.artemis.api.core.client.ClientSessionFactory;
|
import org.apache.activemq.artemis.api.core.client.ClientSessionFactory;
|
||||||
import org.apache.activemq.artemis.core.config.ha.DistributedPrimitiveManagerConfiguration;
|
import org.apache.activemq.artemis.core.config.ha.DistributedPrimitiveManagerConfiguration;
|
||||||
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
|
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
|
||||||
import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration;
|
|
||||||
import org.apache.activemq.artemis.quorum.file.FileBasedPrimitiveManager;
|
import org.apache.activemq.artemis.quorum.file.FileBasedPrimitiveManager;
|
||||||
import org.apache.activemq.artemis.spi.core.security.ActiveMQBasicSecurityManager;
|
import org.apache.activemq.artemis.spi.core.security.ActiveMQBasicSecurityManager;
|
||||||
import org.apache.activemq.artemis.tests.integration.cluster.failover.FailoverTestBase;
|
import org.apache.activemq.artemis.tests.integration.cluster.failover.FailoverTestBase;
|
||||||
|
@ -66,9 +65,9 @@ public class PluggableQuorumBasicSecurityManagerFailoverTest extends FailoverTes
|
||||||
.setLargeMessagesDirectory(getLargeMessagesDir(0, true));
|
.setLargeMessagesDirectory(getLargeMessagesDir(0, true));
|
||||||
|
|
||||||
setupHAPolicyConfiguration();
|
setupHAPolicyConfiguration();
|
||||||
nodeManager = createReplicatedBackupNodeManager(backupConfig);
|
backupNodeManager = createReplicatedBackupNodeManager(backupConfig);
|
||||||
|
|
||||||
backupServer = createTestableServer(backupConfig);
|
backupServer = createTestableServer(backupConfig, backupNodeManager);
|
||||||
|
|
||||||
backupServer.getServer().setSecurityManager(new ActiveMQBasicSecurityManager());
|
backupServer.getServer().setSecurityManager(new ActiveMQBasicSecurityManager());
|
||||||
|
|
||||||
|
@ -77,7 +76,8 @@ public class PluggableQuorumBasicSecurityManagerFailoverTest extends FailoverTes
|
||||||
.clearAcceptorConfigurations()
|
.clearAcceptorConfigurations()
|
||||||
.addAcceptorConfiguration(getAcceptorTransportConfiguration(true));
|
.addAcceptorConfiguration(getAcceptorTransportConfiguration(true));
|
||||||
|
|
||||||
liveServer = createTestableServer(liveConfig);
|
nodeManager = createNodeManager(liveConfig);
|
||||||
|
liveServer = createTestableServer(liveConfig, nodeManager);
|
||||||
|
|
||||||
liveServer.getServer().setSecurityManager(new ActiveMQBasicSecurityManager());
|
liveServer.getServer().setSecurityManager(new ActiveMQBasicSecurityManager());
|
||||||
}
|
}
|
||||||
|
@ -94,7 +94,6 @@ public class PluggableQuorumBasicSecurityManagerFailoverTest extends FailoverTes
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void setupHAPolicyConfiguration() {
|
protected void setupHAPolicyConfiguration() {
|
||||||
((ReplicationPrimaryPolicyConfiguration) liveConfig.getHAPolicyConfiguration()).setCheckForLiveServer(true);
|
|
||||||
((ReplicationBackupPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setMaxSavedReplicatedJournalsSize(2).setAllowFailBack(true);
|
((ReplicationBackupPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setMaxSavedReplicatedJournalsSize(2).setAllowFailBack(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -677,6 +677,44 @@
|
||||||
</args>
|
</args>
|
||||||
</configuration>
|
</configuration>
|
||||||
</execution>
|
</execution>
|
||||||
|
<execution>
|
||||||
|
<phase>test-compile</phase>
|
||||||
|
<id>create-zk-replication-primary-peer-a</id>
|
||||||
|
<goals>
|
||||||
|
<goal>create</goal>
|
||||||
|
</goals>
|
||||||
|
<configuration>
|
||||||
|
<configuration>${basedir}/target/classes/servers/zkReplicationPrimaryPeerA</configuration>
|
||||||
|
<allowAnonymous>true</allowAnonymous>
|
||||||
|
<user>admin</user>
|
||||||
|
<password>admin</password>
|
||||||
|
<instance>${basedir}/target/zkReplicationPrimaryPeerA</instance>
|
||||||
|
<args>
|
||||||
|
<!-- this is needed to run the server remotely -->
|
||||||
|
<arg>--java-options</arg>
|
||||||
|
<arg>-Djava.rmi.server.hostname=localhost</arg>
|
||||||
|
</args>
|
||||||
|
</configuration>
|
||||||
|
</execution>
|
||||||
|
<execution>
|
||||||
|
<phase>test-compile</phase>
|
||||||
|
<id>create-zk-replication-primary-peer-b</id>
|
||||||
|
<goals>
|
||||||
|
<goal>create</goal>
|
||||||
|
</goals>
|
||||||
|
<configuration>
|
||||||
|
<configuration>${basedir}/target/classes/servers/zkReplicationPrimaryPeerB</configuration>
|
||||||
|
<allowAnonymous>true</allowAnonymous>
|
||||||
|
<user>admin</user>
|
||||||
|
<password>admin</password>
|
||||||
|
<instance>${basedir}/target/zkReplicationPrimaryPeerB</instance>
|
||||||
|
<args>
|
||||||
|
<!-- this is needed to run the server remotely -->
|
||||||
|
<arg>--java-options</arg>
|
||||||
|
<arg>-Djava.rmi.server.hostname=localhost</arg>
|
||||||
|
</args>
|
||||||
|
</configuration>
|
||||||
|
</execution>
|
||||||
<execution>
|
<execution>
|
||||||
<phase>test-compile</phase>
|
<phase>test-compile</phase>
|
||||||
<id>create-zk-replication-backup</id>
|
<id>create-zk-replication-backup</id>
|
||||||
|
|
|
@ -39,7 +39,6 @@ under the License.
|
||||||
<property key="connect-string" value="127.0.0.1:6666,127.0.0.1:6667,127.0.0.1:6668"/>
|
<property key="connect-string" value="127.0.0.1:6666,127.0.0.1:6667,127.0.0.1:6668"/>
|
||||||
</properties>
|
</properties>
|
||||||
</manager>
|
</manager>
|
||||||
<check-for-live-server>true</check-for-live-server>
|
|
||||||
</primary>
|
</primary>
|
||||||
</replication>
|
</replication>
|
||||||
</ha-policy>
|
</ha-policy>
|
||||||
|
|
|
@ -0,0 +1,138 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
or more contributor license agreements. See the NOTICE file
|
||||||
|
distributed with this work for additional information
|
||||||
|
regarding copyright ownership. The ASF licenses this file
|
||||||
|
to you under the Apache License, Version 2.0 (the
|
||||||
|
"License"); you may not use this file except in compliance
|
||||||
|
with the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing,
|
||||||
|
software distributed under the License is distributed on an
|
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
KIND, either express or implied. See the License for the
|
||||||
|
specific language governing permissions and limitations
|
||||||
|
under the License.
|
||||||
|
--><configuration xmlns="urn:activemq" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="urn:activemq /schema/artemis-server.xsd">
|
||||||
|
|
||||||
|
<core xmlns="urn:activemq:core">
|
||||||
|
|
||||||
|
<name>primary-peer-a</name>
|
||||||
|
|
||||||
|
<bindings-directory>./data/bindings</bindings-directory>
|
||||||
|
|
||||||
|
<journal-directory>./data/journal</journal-directory>
|
||||||
|
|
||||||
|
<large-messages-directory>./data/largemessages</large-messages-directory>
|
||||||
|
|
||||||
|
<paging-directory>./data/paging</paging-directory>
|
||||||
|
|
||||||
|
<ha-policy>
|
||||||
|
<replication>
|
||||||
|
<primary>
|
||||||
|
<manager>
|
||||||
|
<class-name>org.apache.activemq.artemis.quorum.zookeeper.CuratorDistributedPrimitiveManager</class-name>
|
||||||
|
<properties>
|
||||||
|
<property key="connect-string" value="127.0.0.1:6666,127.0.0.1:6667,127.0.0.1:6668"/>
|
||||||
|
</properties>
|
||||||
|
</manager>
|
||||||
|
<coordination-id>peer-journal-001</coordination-id>
|
||||||
|
</primary>
|
||||||
|
</replication>
|
||||||
|
</ha-policy>
|
||||||
|
|
||||||
|
<connectors>
|
||||||
|
<!-- Connector used to be announced through cluster connections and notifications -->
|
||||||
|
<connector name="artemis">tcp://localhost:61616</connector>
|
||||||
|
<connector name="peer">tcp://localhost:61716</connector>
|
||||||
|
</connectors>
|
||||||
|
|
||||||
|
|
||||||
|
<!-- Acceptors -->
|
||||||
|
<acceptors>
|
||||||
|
<acceptor name="artemis">tcp://localhost:61616</acceptor>
|
||||||
|
</acceptors>
|
||||||
|
|
||||||
|
<cluster-user>admin</cluster-user>
|
||||||
|
|
||||||
|
<cluster-password>password</cluster-password>
|
||||||
|
|
||||||
|
<cluster-connections>
|
||||||
|
<cluster-connection name="my-cluster">
|
||||||
|
<connector-ref>artemis</connector-ref>
|
||||||
|
<message-load-balancing>OFF</message-load-balancing>
|
||||||
|
<max-hops>1</max-hops>
|
||||||
|
<static-connectors>
|
||||||
|
<connector-ref>peer</connector-ref>
|
||||||
|
</static-connectors>
|
||||||
|
</cluster-connection>
|
||||||
|
</cluster-connections>
|
||||||
|
|
||||||
|
<!-- Other config -->
|
||||||
|
|
||||||
|
<security-settings>
|
||||||
|
<!--security for example queue-->
|
||||||
|
<security-setting match="#">
|
||||||
|
<permission type="createNonDurableQueue" roles="amq, guest"/>
|
||||||
|
<permission type="deleteNonDurableQueue" roles="amq, guest"/>
|
||||||
|
<permission type="createDurableQueue" roles="amq, guest"/>
|
||||||
|
<permission type="deleteDurableQueue" roles="amq, guest"/>
|
||||||
|
<permission type="createAddress" roles="amq, guest"/>
|
||||||
|
<permission type="deleteAddress" roles="amq, guest"/>
|
||||||
|
<permission type="consume" roles="amq, guest"/>
|
||||||
|
<permission type="browse" roles="amq, guest"/>
|
||||||
|
<permission type="send" roles="amq, guest"/>
|
||||||
|
<!-- we need this otherwise ./artemis data imp wouldn't work -->
|
||||||
|
<permission type="manage" roles="amq"/>
|
||||||
|
</security-setting>
|
||||||
|
</security-settings>
|
||||||
|
|
||||||
|
<address-settings>
|
||||||
|
<!-- if you define auto-create on certain queues, management has to be auto-create -->
|
||||||
|
<address-setting match="activemq.management#">
|
||||||
|
<dead-letter-address>DLQ</dead-letter-address>
|
||||||
|
<expiry-address>ExpiryQueue</expiry-address>
|
||||||
|
<redelivery-delay>0</redelivery-delay>
|
||||||
|
<!-- with -1 only the global-max-size is in use for limiting -->
|
||||||
|
<max-size-bytes>-1</max-size-bytes>
|
||||||
|
<message-counter-history-day-limit>10</message-counter-history-day-limit>
|
||||||
|
<address-full-policy>PAGE</address-full-policy>
|
||||||
|
<auto-create-queues>true</auto-create-queues>
|
||||||
|
<auto-create-addresses>true</auto-create-addresses>
|
||||||
|
<auto-create-jms-queues>true</auto-create-jms-queues>
|
||||||
|
<auto-create-jms-topics>true</auto-create-jms-topics>
|
||||||
|
</address-setting>
|
||||||
|
<!--default for catch all-->
|
||||||
|
<address-setting match="#">
|
||||||
|
<dead-letter-address>DLQ</dead-letter-address>
|
||||||
|
<expiry-address>ExpiryQueue</expiry-address>
|
||||||
|
<redelivery-delay>0</redelivery-delay>
|
||||||
|
<!-- with -1 only the global-max-size is in use for limiting -->
|
||||||
|
<max-size-bytes>10MB</max-size-bytes>
|
||||||
|
<page-size-bytes>1MB</page-size-bytes>
|
||||||
|
|
||||||
|
<message-counter-history-day-limit>10</message-counter-history-day-limit>
|
||||||
|
<address-full-policy>PAGE</address-full-policy>
|
||||||
|
<auto-create-queues>true</auto-create-queues>
|
||||||
|
<auto-create-addresses>true</auto-create-addresses>
|
||||||
|
<auto-create-jms-queues>true</auto-create-jms-queues>
|
||||||
|
<auto-create-jms-topics>true</auto-create-jms-topics>
|
||||||
|
</address-setting>
|
||||||
|
</address-settings>
|
||||||
|
|
||||||
|
<addresses>
|
||||||
|
<address name="exampleTopic">
|
||||||
|
<multicast>
|
||||||
|
</multicast>
|
||||||
|
</address>
|
||||||
|
<address name="exampleQueue">
|
||||||
|
<anycast>
|
||||||
|
<queue name="exampleQueue"/>
|
||||||
|
</anycast>
|
||||||
|
</address>
|
||||||
|
</addresses>
|
||||||
|
</core>
|
||||||
|
</configuration>
|
|
@ -0,0 +1,20 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||||
|
<!--
|
||||||
|
~ Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
~ contributor license agreements. See the NOTICE file distributed with
|
||||||
|
~ this work for additional information regarding copyright ownership.
|
||||||
|
~ The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
~ (the "License"); you may not use this file except in compliance with
|
||||||
|
~ the License. You may obtain a copy of the License at
|
||||||
|
~
|
||||||
|
~ http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
~
|
||||||
|
~ Unless required by applicable law or agreed to in writing, software
|
||||||
|
~ distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
~ See the License for the specific language governing permissions and
|
||||||
|
~ limitations under the License.
|
||||||
|
-->
|
||||||
|
<management-context xmlns="http://activemq.org/schema">
|
||||||
|
<connector connector-port="10099" connector-host="localhost"/>
|
||||||
|
</management-context>
|
|
@ -0,0 +1,138 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
or more contributor license agreements. See the NOTICE file
|
||||||
|
distributed with this work for additional information
|
||||||
|
regarding copyright ownership. The ASF licenses this file
|
||||||
|
to you under the Apache License, Version 2.0 (the
|
||||||
|
"License"); you may not use this file except in compliance
|
||||||
|
with the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing,
|
||||||
|
software distributed under the License is distributed on an
|
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
KIND, either express or implied. See the License for the
|
||||||
|
specific language governing permissions and limitations
|
||||||
|
under the License.
|
||||||
|
--><configuration xmlns="urn:activemq" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="urn:activemq /schema/artemis-server.xsd">
|
||||||
|
|
||||||
|
<core xmlns="urn:activemq:core">
|
||||||
|
|
||||||
|
<name>primary-peer-b</name>
|
||||||
|
|
||||||
|
<bindings-directory>./data/bindings</bindings-directory>
|
||||||
|
|
||||||
|
<journal-directory>./data/journal</journal-directory>
|
||||||
|
|
||||||
|
<large-messages-directory>./data/largemessages</large-messages-directory>
|
||||||
|
|
||||||
|
<paging-directory>./data/paging</paging-directory>
|
||||||
|
|
||||||
|
<ha-policy>
|
||||||
|
<replication>
|
||||||
|
<primary>
|
||||||
|
<manager>
|
||||||
|
<class-name>org.apache.activemq.artemis.quorum.zookeeper.CuratorDistributedPrimitiveManager</class-name>
|
||||||
|
<properties>
|
||||||
|
<property key="connect-string" value="127.0.0.1:6666,127.0.0.1:6667,127.0.0.1:6668"/>
|
||||||
|
</properties>
|
||||||
|
</manager>
|
||||||
|
<coordination-id>peer-journal-001</coordination-id>
|
||||||
|
</primary>
|
||||||
|
</replication>
|
||||||
|
</ha-policy>
|
||||||
|
|
||||||
|
<connectors>
|
||||||
|
<!-- Connector used to be announced through cluster connections and notifications -->
|
||||||
|
<connector name="artemis">tcp://localhost:61716</connector>
|
||||||
|
<connector name="peer">tcp://localhost:61616</connector>
|
||||||
|
</connectors>
|
||||||
|
|
||||||
|
|
||||||
|
<!-- Acceptors -->
|
||||||
|
<acceptors>
|
||||||
|
<acceptor name="artemis">tcp://localhost:61716</acceptor>
|
||||||
|
</acceptors>
|
||||||
|
|
||||||
|
<cluster-user>admin</cluster-user>
|
||||||
|
|
||||||
|
<cluster-password>password</cluster-password>
|
||||||
|
|
||||||
|
<cluster-connections>
|
||||||
|
<cluster-connection name="my-cluster">
|
||||||
|
<connector-ref>artemis</connector-ref>
|
||||||
|
<message-load-balancing>OFF</message-load-balancing>
|
||||||
|
<max-hops>1</max-hops>
|
||||||
|
<static-connectors>
|
||||||
|
<connector-ref>peer</connector-ref>
|
||||||
|
</static-connectors>
|
||||||
|
</cluster-connection>
|
||||||
|
</cluster-connections>
|
||||||
|
|
||||||
|
<!-- Other config -->
|
||||||
|
|
||||||
|
<security-settings>
|
||||||
|
<!--security for example queue-->
|
||||||
|
<security-setting match="#">
|
||||||
|
<permission type="createNonDurableQueue" roles="amq, guest"/>
|
||||||
|
<permission type="deleteNonDurableQueue" roles="amq, guest"/>
|
||||||
|
<permission type="createDurableQueue" roles="amq, guest"/>
|
||||||
|
<permission type="deleteDurableQueue" roles="amq, guest"/>
|
||||||
|
<permission type="createAddress" roles="amq, guest"/>
|
||||||
|
<permission type="deleteAddress" roles="amq, guest"/>
|
||||||
|
<permission type="consume" roles="amq, guest"/>
|
||||||
|
<permission type="browse" roles="amq, guest"/>
|
||||||
|
<permission type="send" roles="amq, guest"/>
|
||||||
|
<!-- we need this otherwise ./artemis data imp wouldn't work -->
|
||||||
|
<permission type="manage" roles="amq"/>
|
||||||
|
</security-setting>
|
||||||
|
</security-settings>
|
||||||
|
|
||||||
|
<address-settings>
|
||||||
|
<!-- if you define auto-create on certain queues, management has to be auto-create -->
|
||||||
|
<address-setting match="activemq.management#">
|
||||||
|
<dead-letter-address>DLQ</dead-letter-address>
|
||||||
|
<expiry-address>ExpiryQueue</expiry-address>
|
||||||
|
<redelivery-delay>0</redelivery-delay>
|
||||||
|
<!-- with -1 only the global-max-size is in use for limiting -->
|
||||||
|
<max-size-bytes>-1</max-size-bytes>
|
||||||
|
<message-counter-history-day-limit>10</message-counter-history-day-limit>
|
||||||
|
<address-full-policy>PAGE</address-full-policy>
|
||||||
|
<auto-create-queues>true</auto-create-queues>
|
||||||
|
<auto-create-addresses>true</auto-create-addresses>
|
||||||
|
<auto-create-jms-queues>true</auto-create-jms-queues>
|
||||||
|
<auto-create-jms-topics>true</auto-create-jms-topics>
|
||||||
|
</address-setting>
|
||||||
|
<!--default for catch all-->
|
||||||
|
<address-setting match="#">
|
||||||
|
<dead-letter-address>DLQ</dead-letter-address>
|
||||||
|
<expiry-address>ExpiryQueue</expiry-address>
|
||||||
|
<redelivery-delay>0</redelivery-delay>
|
||||||
|
<!-- with -1 only the global-max-size is in use for limiting -->
|
||||||
|
<max-size-bytes>10MB</max-size-bytes>
|
||||||
|
<page-size-bytes>1MB</page-size-bytes>
|
||||||
|
|
||||||
|
<message-counter-history-day-limit>10</message-counter-history-day-limit>
|
||||||
|
<address-full-policy>PAGE</address-full-policy>
|
||||||
|
<auto-create-queues>true</auto-create-queues>
|
||||||
|
<auto-create-addresses>true</auto-create-addresses>
|
||||||
|
<auto-create-jms-queues>true</auto-create-jms-queues>
|
||||||
|
<auto-create-jms-topics>true</auto-create-jms-topics>
|
||||||
|
</address-setting>
|
||||||
|
</address-settings>
|
||||||
|
|
||||||
|
<addresses>
|
||||||
|
<address name="exampleTopic">
|
||||||
|
<multicast>
|
||||||
|
</multicast>
|
||||||
|
</address>
|
||||||
|
<address name="exampleQueue">
|
||||||
|
<anycast>
|
||||||
|
<queue name="exampleQueue"/>
|
||||||
|
</anycast>
|
||||||
|
</address>
|
||||||
|
</addresses>
|
||||||
|
</core>
|
||||||
|
</configuration>
|
|
@ -0,0 +1,20 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||||
|
<!--
|
||||||
|
~ Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
~ contributor license agreements. See the NOTICE file distributed with
|
||||||
|
~ this work for additional information regarding copyright ownership.
|
||||||
|
~ The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
~ (the "License"); you may not use this file except in compliance with
|
||||||
|
~ the License. You may obtain a copy of the License at
|
||||||
|
~
|
||||||
|
~ http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
~
|
||||||
|
~ Unless required by applicable law or agreed to in writing, software
|
||||||
|
~ distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
~ See the License for the specific language governing permissions and
|
||||||
|
~ limitations under the License.
|
||||||
|
-->
|
||||||
|
<management-context xmlns="http://activemq.org/schema">
|
||||||
|
<connector connector-port="10199" connector-host="localhost"/>
|
||||||
|
</management-context>
|
|
@ -20,8 +20,7 @@ package org.apache.activemq.artemis.tests.smoke.quorum;
|
||||||
import javax.management.remote.JMXServiceURL;
|
import javax.management.remote.JMXServiceURL;
|
||||||
import java.net.MalformedURLException;
|
import java.net.MalformedURLException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collection;
|
import java.util.LinkedList;
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
|
@ -55,15 +54,15 @@ public abstract class PluggableQuorumSinglePairTest extends SmokeTestBase {
|
||||||
|
|
||||||
private static final Logger LOGGER = Logger.getLogger(PluggableQuorumSinglePairTest.class);
|
private static final Logger LOGGER = Logger.getLogger(PluggableQuorumSinglePairTest.class);
|
||||||
|
|
||||||
private static final String JMX_SERVER_HOSTNAME = "localhost";
|
static final String JMX_SERVER_HOSTNAME = "localhost";
|
||||||
private static final int JMX_PORT_PRIMARY = 10099;
|
static final int JMX_PORT_PRIMARY = 10099;
|
||||||
private static final int JMX_PORT_BACKUP = 10199;
|
static final int JMX_PORT_BACKUP = 10199;
|
||||||
|
|
||||||
private static final String PRIMARY_DATA_FOLDER = "ReplicationPrimary";;
|
static final String PRIMARY_DATA_FOLDER = "ReplicationPrimary";
|
||||||
private static final String BACKUP_DATA_FOLDER = "ReplicationBackup";
|
static final String BACKUP_DATA_FOLDER = "ReplicationBackup";
|
||||||
|
|
||||||
private static final int PRIMARY_PORT_OFFSET = 0;
|
static final int PRIMARY_PORT_OFFSET = 0;
|
||||||
private static final int BACKUP_PORT_OFFSET = PRIMARY_PORT_OFFSET + 100;
|
static final int BACKUP_PORT_OFFSET = PRIMARY_PORT_OFFSET + 100;
|
||||||
|
|
||||||
public static class BrokerControl {
|
public static class BrokerControl {
|
||||||
|
|
||||||
|
@ -73,7 +72,7 @@ public abstract class PluggableQuorumSinglePairTest extends SmokeTestBase {
|
||||||
final JMXServiceURL jmxServiceURL;
|
final JMXServiceURL jmxServiceURL;
|
||||||
final int portID;
|
final int portID;
|
||||||
|
|
||||||
private BrokerControl(final String name, int jmxPort, String dataFolder, int portID) {
|
BrokerControl(final String name, int jmxPort, String dataFolder, int portID) {
|
||||||
this.portID = portID;
|
this.portID = portID;
|
||||||
this.dataFolder = dataFolder;
|
this.dataFolder = dataFolder;
|
||||||
try {
|
try {
|
||||||
|
@ -108,6 +107,14 @@ public abstract class PluggableQuorumSinglePairTest extends SmokeTestBase {
|
||||||
public Optional<String> listNetworkTopology() throws Exception {
|
public Optional<String> listNetworkTopology() throws Exception {
|
||||||
return Jmx.listNetworkTopology(jmxServiceURL, objectNameBuilder);
|
return Jmx.listNetworkTopology(jmxServiceURL, objectNameBuilder);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Optional<Long> getActivationSequence() throws Exception {
|
||||||
|
return Jmx.getActivationSequence(jmxServiceURL, objectNameBuilder);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Optional<Boolean> isActive() throws Exception {
|
||||||
|
return Jmx.isActive(jmxServiceURL, objectNameBuilder);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Parameterized.Parameter
|
@Parameterized.Parameter
|
||||||
|
@ -118,14 +125,14 @@ public abstract class PluggableQuorumSinglePairTest extends SmokeTestBase {
|
||||||
return Arrays.asList(new Object[][]{{false}, {true}});
|
return Arrays.asList(new Object[][]{{false}, {true}});
|
||||||
}
|
}
|
||||||
|
|
||||||
private final BrokerControl primary;
|
protected BrokerControl primary;
|
||||||
private final BrokerControl backup;
|
protected BrokerControl backup;
|
||||||
private final Collection<BrokerControl> brokers;
|
protected LinkedList<BrokerControl> brokers;
|
||||||
|
|
||||||
public PluggableQuorumSinglePairTest(String brokerFolderPrefix) {
|
public PluggableQuorumSinglePairTest(String brokerFolderPrefix) {
|
||||||
primary = new BrokerControl("primary", JMX_PORT_PRIMARY, brokerFolderPrefix + PRIMARY_DATA_FOLDER, PRIMARY_PORT_OFFSET);
|
primary = new BrokerControl("primary", JMX_PORT_PRIMARY, brokerFolderPrefix + PRIMARY_DATA_FOLDER, PRIMARY_PORT_OFFSET);
|
||||||
backup = new BrokerControl("backup", JMX_PORT_BACKUP, brokerFolderPrefix + BACKUP_DATA_FOLDER, BACKUP_PORT_OFFSET);
|
backup = new BrokerControl("backup", JMX_PORT_BACKUP, brokerFolderPrefix + BACKUP_DATA_FOLDER, BACKUP_PORT_OFFSET);
|
||||||
brokers = Collections.unmodifiableList(Arrays.asList(primary, backup));
|
brokers = new LinkedList(Arrays.asList(primary, backup));
|
||||||
}
|
}
|
||||||
|
|
||||||
protected abstract boolean awaitAsyncSetupCompleted(long timeout, TimeUnit unit) throws InterruptedException;
|
protected abstract boolean awaitAsyncSetupCompleted(long timeout, TimeUnit unit) throws InterruptedException;
|
||||||
|
@ -150,6 +157,10 @@ public abstract class PluggableQuorumSinglePairTest extends SmokeTestBase {
|
||||||
Process primaryInstance = primary.startServer(this, timeout);
|
Process primaryInstance = primary.startServer(this, timeout);
|
||||||
Assert.assertTrue(awaitAsyncSetupCompleted(timeout, TimeUnit.MILLISECONDS));
|
Assert.assertTrue(awaitAsyncSetupCompleted(timeout, TimeUnit.MILLISECONDS));
|
||||||
Wait.assertTrue(() -> !primary.isBackup().orElse(true), timeout);
|
Wait.assertTrue(() -> !primary.isBackup().orElse(true), timeout);
|
||||||
|
|
||||||
|
// primary UN REPLICATED
|
||||||
|
Assert.assertEquals(1L, primary.getActivationSequence().get().longValue());
|
||||||
|
|
||||||
LOGGER.info("started primary");
|
LOGGER.info("started primary");
|
||||||
LOGGER.info("starting backup");
|
LOGGER.info("starting backup");
|
||||||
Process backupInstance = backup.startServer(this, 0);
|
Process backupInstance = backup.startServer(this, 0);
|
||||||
|
@ -176,6 +187,11 @@ public abstract class PluggableQuorumSinglePairTest extends SmokeTestBase {
|
||||||
Assert.assertNotNull(urlPrimary);
|
Assert.assertNotNull(urlPrimary);
|
||||||
LOGGER.infof("primary: %s", urlPrimary);
|
LOGGER.infof("primary: %s", urlPrimary);
|
||||||
Assert.assertNotEquals(urlPrimary, urlBackup);
|
Assert.assertNotEquals(urlPrimary, urlBackup);
|
||||||
|
|
||||||
|
// primary REPLICATED, backup matches (has replicated) activation sequence
|
||||||
|
Assert.assertEquals(1L, primary.getActivationSequence().get().longValue());
|
||||||
|
Assert.assertEquals(1L, backup.getActivationSequence().get().longValue());
|
||||||
|
|
||||||
LOGGER.info("killing primary");
|
LOGGER.info("killing primary");
|
||||||
ServerUtil.killServer(primaryInstance, forceKill);
|
ServerUtil.killServer(primaryInstance, forceKill);
|
||||||
LOGGER.info("killed primary");
|
LOGGER.info("killed primary");
|
||||||
|
@ -188,11 +204,15 @@ public abstract class PluggableQuorumSinglePairTest extends SmokeTestBase {
|
||||||
.and(withNodes(1))), timeout);
|
.and(withNodes(1))), timeout);
|
||||||
LOGGER.infof("backup topology is: %s", backup.listNetworkTopology().get());
|
LOGGER.infof("backup topology is: %s", backup.listNetworkTopology().get());
|
||||||
Assert.assertEquals(nodeID, backup.getNodeID().get());
|
Assert.assertEquals(nodeID, backup.getNodeID().get());
|
||||||
|
|
||||||
|
// backup UN REPLICATED (new version)
|
||||||
|
Assert.assertEquals(2L, backup.getActivationSequence().get().longValue());
|
||||||
|
|
||||||
// wait a bit before restarting primary
|
// wait a bit before restarting primary
|
||||||
LOGGER.info("waiting before starting primary");
|
LOGGER.info("waiting before starting primary");
|
||||||
TimeUnit.SECONDS.sleep(4);
|
TimeUnit.SECONDS.sleep(4);
|
||||||
LOGGER.info("starting primary");
|
LOGGER.info("starting primary");
|
||||||
primary.startServer(this, 0);
|
primaryInstance = primary.startServer(this, 0);
|
||||||
LOGGER.info("started primary");
|
LOGGER.info("started primary");
|
||||||
Wait.assertTrue(() -> backup.isBackup().orElse(false), timeout);
|
Wait.assertTrue(() -> backup.isBackup().orElse(false), timeout);
|
||||||
Assert.assertTrue(!primary.isBackup().get());
|
Assert.assertTrue(!primary.isBackup().get());
|
||||||
|
@ -209,6 +229,14 @@ public abstract class PluggableQuorumSinglePairTest extends SmokeTestBase {
|
||||||
Assert.assertTrue(backup.isReplicaSync().get());
|
Assert.assertTrue(backup.isReplicaSync().get());
|
||||||
LOGGER.infof("backup is synchronized with live");
|
LOGGER.infof("backup is synchronized with live");
|
||||||
Assert.assertEquals(nodeID, primary.getNodeID().get());
|
Assert.assertEquals(nodeID, primary.getNodeID().get());
|
||||||
|
|
||||||
|
// primary ran un replicated for a short while after failback, before backup was in sync
|
||||||
|
Assert.assertEquals(3L, primary.getActivationSequence().get().longValue());
|
||||||
|
Assert.assertEquals(3L, backup.getActivationSequence().get().longValue());
|
||||||
|
|
||||||
|
LOGGER.infof("Done, killing both");
|
||||||
|
ServerUtil.killServer(primaryInstance);
|
||||||
|
ServerUtil.killServer(backupInstance);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -272,5 +300,87 @@ public abstract class PluggableQuorumSinglePairTest extends SmokeTestBase {
|
||||||
Wait.waitFor(()-> !backupInstance.isAlive(), timeout);
|
Wait.waitFor(()-> !backupInstance.isAlive(), timeout);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testOnlyLastUnreplicatedCanStart() throws Exception {
|
||||||
|
final int timeout = (int) TimeUnit.SECONDS.toMillis(30);
|
||||||
|
LOGGER.info("starting primary");
|
||||||
|
Process primaryInstance = primary.startServer(this, timeout);
|
||||||
|
Assert.assertTrue(awaitAsyncSetupCompleted(timeout, TimeUnit.MILLISECONDS));
|
||||||
|
Wait.assertTrue(() -> !primary.isBackup().orElse(true), timeout);
|
||||||
|
LOGGER.info("started primary");
|
||||||
|
LOGGER.info("starting backup");
|
||||||
|
Process backupInstance = backup.startServer(this, 0);
|
||||||
|
Wait.assertTrue(() -> backup.isBackup().orElse(false), timeout);
|
||||||
|
final String nodeID = primary.getNodeID().get();
|
||||||
|
Assert.assertNotNull(nodeID);
|
||||||
|
LOGGER.infof("NodeID: %s", nodeID);
|
||||||
|
for (BrokerControl broker : brokers) {
|
||||||
|
Wait.assertTrue(() -> validateNetworkTopology(broker.listNetworkTopology().orElse(""),
|
||||||
|
containsExactNodeIds(nodeID)
|
||||||
|
.and(withLive(nodeID, Objects::nonNull))
|
||||||
|
.and(withBackup(nodeID, Objects::nonNull))
|
||||||
|
.and(withMembers(1))
|
||||||
|
.and(withNodes(2))), timeout);
|
||||||
|
}
|
||||||
|
LOGGER.infof("primary topology is: %s", primary.listNetworkTopology().get());
|
||||||
|
LOGGER.infof("backup topology is: %s", backup.listNetworkTopology().get());
|
||||||
|
Assert.assertTrue(backup.isReplicaSync().get());
|
||||||
|
LOGGER.infof("backup is synchronized with live");
|
||||||
|
final String urlBackup = backupOf(nodeID, decodeNetworkTopologyJson(backup.listNetworkTopology().get()));
|
||||||
|
Assert.assertNotNull(urlBackup);
|
||||||
|
LOGGER.infof("backup: %s", urlBackup);
|
||||||
|
final String urlPrimary = liveOf(nodeID, decodeNetworkTopologyJson(primary.listNetworkTopology().get()));
|
||||||
|
Assert.assertNotNull(urlPrimary);
|
||||||
|
LOGGER.infof("primary: %s", urlPrimary);
|
||||||
|
Assert.assertNotEquals(urlPrimary, urlBackup);
|
||||||
|
|
||||||
|
|
||||||
|
// verify sequence id's in sync
|
||||||
|
Assert.assertEquals(1L, primary.getActivationSequence().get().longValue());
|
||||||
|
Assert.assertEquals(1L, backup.getActivationSequence().get().longValue());
|
||||||
|
|
||||||
|
LOGGER.info("killing primary");
|
||||||
|
ServerUtil.killServer(primaryInstance, forceKill);
|
||||||
|
LOGGER.info("killed primary");
|
||||||
|
Wait.assertTrue(() -> !backup.isBackup().orElse(true), timeout);
|
||||||
|
Wait.assertTrue(() -> validateNetworkTopology(backup.listNetworkTopology().orElse(""),
|
||||||
|
containsExactNodeIds(nodeID)
|
||||||
|
.and(withLive(nodeID, urlBackup::equals))
|
||||||
|
.and(withBackup(nodeID, Objects::isNull))
|
||||||
|
.and(withMembers(1))
|
||||||
|
.and(withNodes(1))), timeout);
|
||||||
|
LOGGER.infof("backup topology is: %s", backup.listNetworkTopology().get());
|
||||||
|
Assert.assertEquals(nodeID, backup.getNodeID().get());
|
||||||
|
|
||||||
|
|
||||||
|
// backup now UNREPLICATED, it is the only node that can continue
|
||||||
|
Assert.assertEquals(2L, backup.getActivationSequence().get().longValue());
|
||||||
|
|
||||||
|
LOGGER.info("killing backup");
|
||||||
|
ServerUtil.killServer(backupInstance, forceKill);
|
||||||
|
|
||||||
|
// wait a bit before restarting primary
|
||||||
|
LOGGER.info("waiting before starting primary");
|
||||||
|
TimeUnit.SECONDS.sleep(4);
|
||||||
|
LOGGER.info("restarting primary");
|
||||||
|
|
||||||
|
Process restartedPrimary = primary.startServer(this, 0);
|
||||||
|
LOGGER.info("restarted primary, " + restartedPrimary);
|
||||||
|
|
||||||
|
Wait.assertFalse("Primary shouldn't activate", () -> primary.isActive().orElse(false), 5000);
|
||||||
|
|
||||||
|
ServerUtil.killServer(restartedPrimary);
|
||||||
|
|
||||||
|
LOGGER.info("restarting backup");
|
||||||
|
|
||||||
|
// backup can resume with data seq 3
|
||||||
|
final Process restartedBackupInstance = backup.startServer(this, 5000);
|
||||||
|
Wait.waitFor(() -> backup.isActive().orElse(false), 5000);
|
||||||
|
assertTrue(Wait.waitFor(() -> nodeID.equals(backup.getNodeID().orElse("not set yet"))));
|
||||||
|
LOGGER.info("restarted backup");
|
||||||
|
|
||||||
|
Assert.assertEquals(3L, backup.getActivationSequence().get().longValue());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,109 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.activemq.artemis.tests.smoke.quorum;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.LinkedList;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
import org.apache.activemq.artemis.util.ServerUtil;
|
||||||
|
import org.apache.activemq.artemis.utils.Wait;
|
||||||
|
import org.jboss.logging.Logger;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import static org.apache.activemq.artemis.tests.smoke.utils.Jmx.containsExactNodeIds;
|
||||||
|
import static org.apache.activemq.artemis.tests.smoke.utils.Jmx.validateNetworkTopology;
|
||||||
|
import static org.apache.activemq.artemis.tests.smoke.utils.Jmx.withBackup;
|
||||||
|
import static org.apache.activemq.artemis.tests.smoke.utils.Jmx.withLive;
|
||||||
|
import static org.apache.activemq.artemis.tests.smoke.utils.Jmx.withMembers;
|
||||||
|
import static org.apache.activemq.artemis.tests.smoke.utils.Jmx.withNodes;
|
||||||
|
|
||||||
|
public class ZookeeperPluggableQuorumPeerTest extends ZookeeperPluggableQuorumSinglePairTest {
|
||||||
|
|
||||||
|
private static final Logger LOGGER = Logger.getLogger(ZookeeperPluggableQuorumPeerTest.class);
|
||||||
|
|
||||||
|
public ZookeeperPluggableQuorumPeerTest() {
|
||||||
|
super();
|
||||||
|
// accepting the primary/backup vars to reuse the test, for peers, these are interchangeable as either can take
|
||||||
|
// both roles as both wish to be primary but will revert to backup
|
||||||
|
primary = new BrokerControl("primary-peer-a", JMX_PORT_PRIMARY, "zkReplicationPrimaryPeerA", PRIMARY_PORT_OFFSET);
|
||||||
|
backup = new BrokerControl("primary-peer-b", JMX_PORT_BACKUP, "zkReplicationPrimaryPeerB", BACKUP_PORT_OFFSET);
|
||||||
|
brokers = new LinkedList(Arrays.asList(primary, backup));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@Override
|
||||||
|
public void testBackupFailoverAndPrimaryFailback() throws Exception {
|
||||||
|
// peers don't request fail back by default
|
||||||
|
// just wait for setup to avoid partial stop of zk via fast tear down with async setup
|
||||||
|
Wait.waitFor(this::ensembleHasLeader);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMultiPrimary_Peer() throws Exception {
|
||||||
|
|
||||||
|
final int timeout = (int) TimeUnit.SECONDS.toMillis(30);
|
||||||
|
LOGGER.info("starting peer b primary");
|
||||||
|
|
||||||
|
Process backupInstance = backup.startServer(this, timeout);
|
||||||
|
|
||||||
|
// alive as unreplicated, it has configured node id
|
||||||
|
assertTrue(Wait.waitFor(() -> 1L == backup.getActivationSequence().orElse(Long.MAX_VALUE).longValue()));
|
||||||
|
|
||||||
|
final String nodeID = backup.getNodeID().get();
|
||||||
|
Assert.assertNotNull(nodeID);
|
||||||
|
LOGGER.infof("NodeID: %s", nodeID);
|
||||||
|
|
||||||
|
LOGGER.info("starting peer a primary");
|
||||||
|
primary.startServer(this, 0);
|
||||||
|
Wait.assertTrue(() -> primary.isBackup().orElse(false), timeout);
|
||||||
|
|
||||||
|
Wait.assertTrue(() -> !backup.isBackup().orElse(true), timeout);
|
||||||
|
|
||||||
|
for (BrokerControl broker : brokers) {
|
||||||
|
Wait.assertTrue(() -> validateNetworkTopology(broker.listNetworkTopology().orElse(""),
|
||||||
|
containsExactNodeIds(nodeID)
|
||||||
|
.and(withLive(nodeID, Objects::nonNull))
|
||||||
|
.and(withBackup(nodeID, Objects::nonNull))
|
||||||
|
.and(withMembers(1))
|
||||||
|
.and(withNodes(2))), timeout);
|
||||||
|
}
|
||||||
|
|
||||||
|
LOGGER.infof("primary topology is: %s", primary.listNetworkTopology().get());
|
||||||
|
LOGGER.infof("backup topology is: %s", backup.listNetworkTopology().get());
|
||||||
|
Assert.assertTrue(backup.isReplicaSync().get());
|
||||||
|
Assert.assertTrue(primary.isReplicaSync().get());
|
||||||
|
|
||||||
|
|
||||||
|
LOGGER.info("killing peer-b");
|
||||||
|
ServerUtil.killServer(backupInstance, forceKill);
|
||||||
|
|
||||||
|
// peer-a now UNREPLICATED
|
||||||
|
Wait.assertTrue(() -> 2L == primary.getActivationSequence().get().longValue());
|
||||||
|
|
||||||
|
LOGGER.info("restarting peer-b");
|
||||||
|
backup.startServer(this, 0);
|
||||||
|
|
||||||
|
assertTrue(Wait.waitFor(() -> nodeID.equals(backup.getNodeID().orElse("not set yet"))));
|
||||||
|
// peer-b now a REPLICA
|
||||||
|
Wait.waitFor(() -> backup.isReplicaSync().get());
|
||||||
|
Wait.assertTrue(() -> 2L == backup.getActivationSequence().get().longValue());
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,13 +1,13 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
* this work for additional information regarding copyright ownership.
|
* this work for additional information regarding copyright ownership.
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
* (the "License"); you may not use this file except in compliance with
|
* (the "License"); you may not use this file except in compliance with
|
||||||
* the License. You may obtain a copy of the License at
|
* the License. You may obtain a copy of the License at
|
||||||
* <p>
|
*
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
* <p>
|
*
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
@ -19,6 +19,8 @@ package org.apache.activemq.artemis.tests.smoke.quorum;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
import org.apache.activemq.artemis.utils.ThreadLeakCheckRule;
|
||||||
import org.apache.curator.test.InstanceSpec;
|
import org.apache.curator.test.InstanceSpec;
|
||||||
import org.apache.curator.test.TestingCluster;
|
import org.apache.curator.test.TestingCluster;
|
||||||
import org.apache.curator.test.TestingZooKeeperServer;
|
import org.apache.curator.test.TestingZooKeeperServer;
|
||||||
|
@ -38,7 +40,7 @@ public class ZookeeperPluggableQuorumSinglePairTest extends PluggableQuorumSingl
|
||||||
|
|
||||||
@Rule
|
@Rule
|
||||||
public TemporaryFolder tmpFolder = new TemporaryFolder();
|
public TemporaryFolder tmpFolder = new TemporaryFolder();
|
||||||
private TestingCluster testingServer;
|
protected TestingCluster testingServer;
|
||||||
private InstanceSpec[] clusterSpecs;
|
private InstanceSpec[] clusterSpecs;
|
||||||
private int nodes;
|
private int nodes;
|
||||||
|
|
||||||
|
@ -60,6 +62,8 @@ public class ZookeeperPluggableQuorumSinglePairTest extends PluggableQuorumSingl
|
||||||
@Override
|
@Override
|
||||||
@After
|
@After
|
||||||
public void after() throws Exception {
|
public void after() throws Exception {
|
||||||
|
// zk bits that leak from servers
|
||||||
|
ThreadLeakCheckRule.addKownThread("ListenerHandler-");
|
||||||
try {
|
try {
|
||||||
super.after();
|
super.after();
|
||||||
} finally {
|
} finally {
|
||||||
|
@ -76,6 +80,16 @@ public class ZookeeperPluggableQuorumSinglePairTest extends PluggableQuorumSingl
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected boolean ensembleHasLeader() {
|
||||||
|
return testingServer.getServers().stream().filter(ZookeeperPluggableQuorumSinglePairTest::isLeader).count() != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean isLeader(TestingZooKeeperServer server) {
|
||||||
|
long leaderId = server.getQuorumPeer().getLeaderId();
|
||||||
|
long id = server.getQuorumPeer().getId();
|
||||||
|
return id == leaderId;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void stopMajority() throws Exception {
|
protected void stopMajority() throws Exception {
|
||||||
List<TestingZooKeeperServer> followers = testingServer.getServers();
|
List<TestingZooKeeperServer> followers = testingServer.getServers();
|
||||||
|
|
|
@ -77,6 +77,16 @@ public class Jmx {
|
||||||
return queryControl(serviceURI, builder.getActiveMQServerObjectName(), ActiveMQServerControl::getNodeID, ActiveMQServerControl.class, throwable -> null);
|
return queryControl(serviceURI, builder.getActiveMQServerObjectName(), ActiveMQServerControl::getNodeID, ActiveMQServerControl.class, throwable -> null);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static Optional<Long> getActivationSequence(JMXServiceURL serviceURI, ObjectNameBuilder builder) throws Exception {
|
||||||
|
return queryControl(serviceURI, builder.getActiveMQServerObjectName(), ActiveMQServerControl::getActivationSequence, ActiveMQServerControl.class, throwable -> null);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Optional<Boolean> isActive(JMXServiceURL serviceURI, ObjectNameBuilder builder) throws Exception {
|
||||||
|
return queryControl(serviceURI, builder.getActiveMQServerObjectName(), ActiveMQServerControl::isActive, ActiveMQServerControl.class, throwable -> null);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
public static Optional<String> listNetworkTopology(JMXServiceURL serviceURI,
|
public static Optional<String> listNetworkTopology(JMXServiceURL serviceURI,
|
||||||
ObjectNameBuilder builder) throws Exception {
|
ObjectNameBuilder builder) throws Exception {
|
||||||
return queryControl(serviceURI, builder.getActiveMQServerObjectName(), ActiveMQServerControl::listNetworkTopology, ActiveMQServerControl.class, throwable -> null);
|
return queryControl(serviceURI, builder.getActiveMQServerObjectName(), ActiveMQServerControl::listNetworkTopology, ActiveMQServerControl.class, throwable -> null);
|
||||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.activemq.artemis.tests.unit.core.server.impl;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
|
||||||
|
import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration;
|
||||||
import org.apache.activemq.artemis.core.server.impl.FileLockNodeManager;
|
import org.apache.activemq.artemis.core.server.impl.FileLockNodeManager;
|
||||||
import org.apache.activemq.artemis.tests.util.ActiveMQTestBase;
|
import org.apache.activemq.artemis.tests.util.ActiveMQTestBase;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
|
@ -33,6 +34,25 @@ public class FileLockTest extends ActiveMQTestBase {
|
||||||
file.mkdirs();
|
file.mkdirs();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSetNodeID() throws Exception {
|
||||||
|
FileLockNodeManager underTest = new FileLockNodeManager(getTestDirfile(), false);
|
||||||
|
ReplicationPrimaryPolicyConfiguration replicationPrimaryPolicyConfiguration = ReplicationPrimaryPolicyConfiguration.withDefault();
|
||||||
|
String seed = "";
|
||||||
|
for (int i = 0; i < 20; i++) {
|
||||||
|
replicationPrimaryPolicyConfiguration.setCoordinationId(seed);
|
||||||
|
if (replicationPrimaryPolicyConfiguration.getCoordinationId() != null) {
|
||||||
|
underTest.setNodeID(replicationPrimaryPolicyConfiguration.getCoordinationId());
|
||||||
|
}
|
||||||
|
seed += String.valueOf(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
replicationPrimaryPolicyConfiguration.setCoordinationId("somme-dash-and-odd");
|
||||||
|
if (replicationPrimaryPolicyConfiguration.getCoordinationId() != null) {
|
||||||
|
underTest.setNodeID(replicationPrimaryPolicyConfiguration.getCoordinationId());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testNIOLock() throws Exception {
|
public void testNIOLock() throws Exception {
|
||||||
doTestLock(new FileLockNodeManager(getTestDirfile(), false), new FileLockNodeManager(getTestDirfile(), false));
|
doTestLock(new FileLockNodeManager(getTestDirfile(), false), new FileLockNodeManager(getTestDirfile(), false));
|
||||||
|
|
Loading…
Reference in New Issue