This commit is contained in:
Clebert Suconic 2021-08-05 14:18:39 -04:00
commit c9f001215f
138 changed files with 10476 additions and 352 deletions

View File

@ -19,7 +19,7 @@ package org.apache.activemq.artemis.cli.commands;
import java.io.File;
import java.util.Timer;
import java.util.TimerTask;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicReference;
import io.airlift.airline.Command;
import io.airlift.airline.Option;
@ -71,6 +71,7 @@ public class Run extends LockAbstract {
public Object execute(ActionContext context) throws Exception {
super.execute(context);
AtomicReference<Throwable> serverActivationFailed = new AtomicReference<>();
try {
BrokerDTO broker = getBrokerDTO();
ActiveMQSecurityManager securityManager = SecurityManagerFactory.create(broker.security);
@ -110,8 +111,7 @@ public class Run extends LockAbstract {
server = BrokerFactory.createServer(broker.server, securityManager, activateCallback);
server.createComponents();
AtomicBoolean serverActivationFailed = new AtomicBoolean(false);
server.getServer().registerActivationFailureListener(exception -> serverActivationFailed.set(true));
server.getServer().registerActivationFailureListener(exception -> serverActivationFailed.set(exception));
server.start();
server.getServer().addExternalComponent(managementContext, false);
@ -126,14 +126,16 @@ public class Run extends LockAbstract {
server.getServer().addExternalComponent(component, true);
assert component.isStarted();
}
if (serverActivationFailed.get()) {
stop();
}
} catch (Throwable t) {
t.printStackTrace();
stop();
serverActivationFailed.set(t);
}
if (serverActivationFailed.get() != null) {
stop();
return serverActivationFailed.get();
}
return new Pair<>(managementContext, server.getServer());
}

View File

@ -2851,4 +2851,12 @@ public interface AuditLogger extends BasicLogger {
@LogMessage(level = Logger.Level.INFO)
@Message(id = 601748, value = "User {0} is getting max retry interval on target resource: {1} {2}", format = Message.Format.MESSAGE_FORMAT)
void getMaxRetryInterval(String user, Object source, Object... args);
static void getActivationSequence(Object source) {
BASE_LOGGER.getActivationSequence(getCaller(), source);
}
@LogMessage(level = Logger.Level.INFO)
@Message(id = 601749, value = "User {0} is getting activation sequence on target resource: {1} {2}", format = Message.Format.MESSAGE_FORMAT)
void getActivationSequence(String user, Object source, Object... args);
}

View File

@ -270,6 +270,18 @@ public class ThreadLeakCheckRule extends TestWatcher {
} else if (threadName.contains("ObjectCleanerThread")) {
// Required since upgrade to Netty 4.1.22 maybe because https://github.com/netty/netty/commit/739e70398ccb6b11ffa97c6b5f8d55e455a2165e
return true;
} else if (threadName.contains("RMI TCP")) {
return true;
} else if (threadName.contains("RMI Scheduler")) {
return true;
} else if (threadName.contains("RMI RenewClean")) {
return true;
} else if (threadName.contains("Signal Dispatcher")) {
return true;
} else if (threadName.contains("ForkJoinPool.commonPool")) {
return true;
} else if (threadName.contains("GC Daemon")) {
return true;
} else {
for (StackTraceElement element : thread.getStackTrace()) {
if (element.getClassName().contains("org.jboss.byteman.agent.TransformListener")) {

View File

@ -264,6 +264,9 @@ public final class ActiveMQDefaultConfiguration {
// the directory to store the journal files in
private static String DEFAULT_JOURNAL_DIR = "data/journal";
// the directory to store the data files in
private static String DEFAULT_DATA_DIR = "data";
// true means that the journal directory will be created
private static boolean DEFAULT_CREATE_JOURNAL_DIR = true;
@ -627,6 +630,8 @@ public final class ActiveMQDefaultConfiguration {
public static final String DEFAULT_TEMPORARY_QUEUE_NAMESPACE = "";
private static final String DEFAULT_DISTRIBUTED_PRIMITIVE_MANAGER_CLASS_NAME = "org.apache.activemq.artemis.quorum.zookeeper.CuratorDistributedPrimitiveManager";
// Number of concurrent workers for a core bridge
public static int DEFAULT_BRIDGE_CONCURRENCY = 1;
@ -938,6 +943,13 @@ public final class ActiveMQDefaultConfiguration {
return DEFAULT_JOURNAL_DIR;
}
/**
* the directory to store the journal files in
*/
public static String getDefaultDataDir() {
return DEFAULT_DATA_DIR;
}
/**
* true means that the journal directory will be created
*/
@ -1721,6 +1733,10 @@ public final class ActiveMQDefaultConfiguration {
return DEFAULT_TEMPORARY_QUEUE_NAMESPACE;
}
public static String getDefaultDistributedPrimitiveManagerClassName() {
return DEFAULT_DISTRIBUTED_PRIMITIVE_MANAGER_CLASS_NAME;
}
public static int getDefaultBridgeConcurrency() {
return DEFAULT_BRIDGE_CONCURRENCY;
}

View File

@ -321,6 +321,15 @@ public interface ActiveMQServerControl {
@Attribute(desc = "Node ID of this server")
String getNodeID();
/**
* Returns the current activation sequence number of this server.
* <br>
* When replicated, peers may coordinate activation with this monotonic sequence
*/
@Attribute(desc = "Activation sequence of this server instance")
long getActivationSequence();
/**
* Returns the management notification address of this server.
* <br>

View File

@ -231,6 +231,17 @@
<version>${project.version}</version>
<classifier>javadoc</classifier>
</dependency>
<!-- quorum -->
<dependency>
<groupId>org.apache.activemq</groupId>
<artifactId>artemis-quorum-api</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.activemq</groupId>
<artifactId>artemis-quorum-ri</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>io.netty</groupId>
<artifactId>netty-all</artifactId>

View File

@ -62,6 +62,9 @@
<include>org.apache.activemq.rest:artemis-rest</include>
<include>org.apache.qpid:qpid-jms-client</include>
<include>io.micrometer:micrometer-core</include>
<!-- quorum -->
<include>org.apache.activemq:artemis-quorum-api</include>
<include>org.apache.activemq:artemis-quorum-ri</include>
<!-- dependencies -->
<include>jakarta.jms:jakarta.jms-api</include>
@ -97,6 +100,12 @@
<include>com.sun.xml.bind:jaxb-impl</include>
<include>jakarta.activation:jakarta.activation-api</include>
<include>jakarta.security.auth.message:jakarta.security.auth.message-api</include>
<!-- quorum -->
<include>org.apache.curator:curator-recipes</include>
<include>org.apache.curator:curator-client</include>
<include>org.apache.curator:curator-framework</include>
<include>org.apache.zookeeper:zookeeper</include>
<include>org.apache.zookeeper:zookeeper-jute</include>
</includes>
<!--excludes>
<exclude>org.apache.activemq:artemis-website</exclude>

View File

@ -81,6 +81,7 @@
<!--bundle dependency="true">mvn:io.micrometer/micrometer-core/${version.micrometer}</bundle-->
<bundle>mvn:org.apache.activemq/activemq-artemis-native/${activemq-artemis-native-version}</bundle>
<bundle>mvn:org.apache.activemq/artemis-quorum-api/${pom.version}</bundle>
<bundle>mvn:org.apache.activemq/artemis-server-osgi/${pom.version}</bundle>
</feature>

View File

@ -0,0 +1,41 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.activemq</groupId>
<artifactId>artemis-pom</artifactId>
<version>2.18.0-SNAPSHOT</version>
</parent>
<artifactId>artemis-quorum-api</artifactId>
<packaging>bundle</packaging>
<name>ActiveMQ Artemis Quorum API</name>
<properties>
<activemq.basedir>${project.basedir}/..</activemq.basedir>
</properties>
<dependencies>
<dependency>
<groupId>com.google.errorprone</groupId>
<artifactId>error_prone_core</artifactId>
</dependency>
</dependencies>
</project>

View File

@ -0,0 +1,87 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.quorum;
import java.util.Objects;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.LockSupport;
public interface DistributedLock extends AutoCloseable {
String getLockId();
boolean isHeldByCaller() throws UnavailableStateException;
boolean tryLock() throws UnavailableStateException, InterruptedException;
default boolean tryLock(long timeout, TimeUnit unit) throws UnavailableStateException, InterruptedException {
// it doesn't make sense to be super fast
final long TARGET_FIRE_PERIOD_NS = TimeUnit.MILLISECONDS.toNanos(250);
if (timeout < 0) {
throw new IllegalArgumentException("timeout cannot be negative");
}
Objects.requireNonNull(unit);
if (timeout == 0) {
return tryLock();
}
final Thread currentThread = Thread.currentThread();
final long timeoutNs = unit.toNanos(timeout);
final long start = System.nanoTime();
final long deadline = start + timeoutNs;
long expectedNextFireTime = start;
while (!currentThread.isInterrupted()) {
long parkNs = expectedNextFireTime - System.nanoTime();
while (parkNs > 0) {
LockSupport.parkNanos(parkNs);
if (currentThread.isInterrupted()) {
throw new InterruptedException();
}
final long now = System.nanoTime();
parkNs = expectedNextFireTime - now;
}
if (tryLock()) {
return true;
}
final long now = System.nanoTime();
final long remainingTime = deadline - now;
if (remainingTime <= 0) {
return false;
}
if (remainingTime < TARGET_FIRE_PERIOD_NS) {
expectedNextFireTime = now;
} else {
expectedNextFireTime += TARGET_FIRE_PERIOD_NS;
}
}
throw new InterruptedException();
}
void unlock() throws UnavailableStateException;
void addListener(UnavailableLockListener listener);
void removeListener(UnavailableLockListener listener);
@FunctionalInterface
interface UnavailableLockListener {
void onUnavailableLockEvent();
}
@Override
void close();
}

View File

@ -0,0 +1,56 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.quorum;
import java.util.Map;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
public interface DistributedPrimitiveManager extends AutoCloseable {
static DistributedPrimitiveManager newInstanceOf(String className, Map<String, String> properties) throws Exception {
return (DistributedPrimitiveManager) Class.forName(className).getDeclaredConstructor(Map.class).newInstance(properties);
}
@FunctionalInterface
interface UnavailableManagerListener {
void onUnavailableManagerEvent();
}
void addUnavailableManagerListener(UnavailableManagerListener listener);
void removeUnavailableManagerListener(UnavailableManagerListener listener);
boolean start(long timeout, TimeUnit unit) throws InterruptedException, ExecutionException;
void start() throws InterruptedException, ExecutionException;
boolean isStarted();
void stop();
DistributedLock getDistributedLock(String lockId) throws InterruptedException, ExecutionException, TimeoutException;
MutableLong getMutableLong(String mutableLongId) throws InterruptedException, ExecutionException, TimeoutException;
@Override
default void close() {
stop();
}
}

View File

@ -0,0 +1,51 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.activemq.artemis.quorum;
public interface MutableLong extends AutoCloseable {
String getMutableLongId();
long get() throws UnavailableStateException;
void set(long value) throws UnavailableStateException;
/**
* This is not meant to be atomic; it's semantically equivalent to:
* <pre>
* long oldValue = mutableLong.get();
* if (mutableLong.oldValue != expectedValue) {
* return false;
* }
* mutableLong.set(newValue);
* return true;
* </pre>
*/
default boolean compareAndSet(long expectedValue, long newValue) throws UnavailableStateException {
final long oldValue = get();
if (oldValue != expectedValue) {
return false;
}
set(newValue);
return true;
}
@Override
void close();
}

View File

@ -0,0 +1,36 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.quorum;
public final class UnavailableStateException extends Exception {
public UnavailableStateException() {
super();
}
public UnavailableStateException(String message) {
super(message);
}
public UnavailableStateException(String message, Throwable cause) {
super(message, cause);
}
public UnavailableStateException(Throwable cause) {
super(cause);
}
}

134
artemis-quorum-ri/pom.xml Normal file
View File

@ -0,0 +1,134 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.activemq</groupId>
<artifactId>artemis-pom</artifactId>
<version>2.18.0-SNAPSHOT</version>
</parent>
<artifactId>artemis-quorum-ri</artifactId>
<packaging>jar</packaging>
<name>ActiveMQ Artemis Quorum RI</name>
<properties>
<activemq.basedir>${project.basedir}/..</activemq.basedir>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.curator</groupId>
<artifactId>curator-recipes</artifactId>
</dependency>
<dependency>
<groupId>org.apache.curator</groupId>
<artifactId>curator-client</artifactId>
</dependency>
<dependency>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.curator</groupId>
<artifactId>curator-test</artifactId>
<version>${curator.version}</version>
</dependency>
<dependency>
<groupId>org.apache.activemq</groupId>
<artifactId>artemis-quorum-api</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.jboss.logging</groupId>
<artifactId>jboss-logging</artifactId>
</dependency>
<dependency>
<groupId>org.jboss.slf4j</groupId>
<artifactId>slf4j-jboss-logmanager</artifactId>
</dependency>
<dependency>
<groupId>org.apache.activemq</groupId>
<artifactId>artemis-commons</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.google.errorprone</groupId>
<artifactId>error_prone_core</artifactId>
</dependency>
<!-- tests -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.hamcrest</groupId>
<artifactId>hamcrest</artifactId>
<version>${hamcrest.version}</version>
<scope>test</scope>
</dependency>
<!-- test logging -->
<dependency>
<groupId>org.jboss.logging</groupId>
<artifactId>jboss-logging-processor</artifactId>
<scope>provided</scope>
<optional>true</optional>
</dependency>
<dependency>
<groupId>org.jboss.logmanager</groupId>
<artifactId>jboss-logmanager</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.wildfly.common</groupId>
<artifactId>wildfly-common</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.activemq</groupId>
<artifactId>artemis-commons</artifactId>
<version>${project.version}</version>
<scope>test</scope>
<type>test-jar</type>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<executions>
<execution>
<phase>test</phase>
<goals>
<goal>test-jar</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View File

@ -0,0 +1,183 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.quorum.file;
import java.io.File;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import org.apache.activemq.artemis.quorum.DistributedLock;
import org.apache.activemq.artemis.quorum.DistributedPrimitiveManager;
import org.apache.activemq.artemis.quorum.MutableLong;
import org.apache.activemq.artemis.quorum.UnavailableStateException;
/**
* This is an implementation suitable to be used just on unit tests and it won't attempt
* to manage nor purge existing stale locks files. It's part of the tests life-cycle to properly
* set-up and tear-down the environment.
*/
public class FileBasedPrimitiveManager implements DistributedPrimitiveManager {
private final File locksFolder;
private final Map<String, FileDistributedLock> locks;
private boolean started;
public FileBasedPrimitiveManager(Map<String, String> args) {
this(new File(args.get("locks-folder")));
}
public FileBasedPrimitiveManager(File locksFolder) {
Objects.requireNonNull(locksFolder);
if (!locksFolder.exists()) {
throw new IllegalStateException(locksFolder + " is supposed to already exists");
}
if (!locksFolder.isDirectory()) {
throw new IllegalStateException(locksFolder + " is supposed to be a directory");
}
this.locksFolder = locksFolder;
this.locks = new HashMap<>();
}
@Override
public boolean isStarted() {
return started;
}
@Override
public void addUnavailableManagerListener(UnavailableManagerListener listener) {
// noop
}
@Override
public void removeUnavailableManagerListener(UnavailableManagerListener listener) {
// noop
}
@Override
public boolean start(long timeout, TimeUnit unit) throws InterruptedException, ExecutionException {
if (timeout >= 0) {
Objects.requireNonNull(unit);
}
if (started) {
return true;
}
started = true;
return true;
}
@Override
public void start() throws InterruptedException, ExecutionException {
start(-1, null);
}
@Override
public void stop() {
if (!started) {
return;
}
try {
locks.forEach((lockId, lock) -> {
try {
lock.close(false);
} catch (Throwable t) {
// TODO no op for now: log would be better!
}
});
locks.clear();
} finally {
started = false;
}
}
@Override
public DistributedLock getDistributedLock(String lockId) throws ExecutionException {
Objects.requireNonNull(lockId);
if (!started) {
throw new IllegalStateException("manager should be started first");
}
final FileDistributedLock lock = locks.get(lockId);
if (lock != null && !lock.isClosed()) {
return lock;
}
try {
final FileDistributedLock newLock = new FileDistributedLock(locks::remove, locksFolder, lockId);
locks.put(lockId, newLock);
return newLock;
} catch (IOException ioEx) {
throw new ExecutionException(ioEx);
}
}
@Override
public MutableLong getMutableLong(final String mutableLongId) throws ExecutionException {
// use a lock file - but with a prefix
final FileDistributedLock fileDistributedLock = (FileDistributedLock) getDistributedLock("ML:" + mutableLongId);
return new MutableLong() {
@Override
public String getMutableLongId() {
return mutableLongId;
}
@Override
public long get() throws UnavailableStateException {
try {
return readLong(fileDistributedLock);
} catch (IOException e) {
throw new UnavailableStateException(e);
}
}
@Override
public void set(long value) throws UnavailableStateException {
try {
writeLong(fileDistributedLock, value);
} catch (IOException e) {
throw new UnavailableStateException(e);
}
}
@Override
public void close() {
fileDistributedLock.close();
}
};
}
private void writeLong(FileDistributedLock fileDistributedLock, long value) throws IOException {
ByteBuffer buffer = ByteBuffer.allocate(Long.BYTES).order(ByteOrder.BIG_ENDIAN);
buffer.putLong(value);
buffer.flip();
if (fileDistributedLock.getChannel().position(0).write(buffer) == Long.BYTES) {
fileDistributedLock.getChannel().force(false);
}
}
private long readLong(FileDistributedLock fileDistributedLock) throws IOException {
ByteBuffer buffer = ByteBuffer.allocate(Long.BYTES).order(ByteOrder.BIG_ENDIAN);
if (fileDistributedLock.getChannel().position(0).read(buffer, 0) != Long.BYTES) {
return 0;
}
buffer.flip();
return buffer.getLong();
}
}

View File

@ -0,0 +1,145 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.quorum.file;
import java.io.File;
import java.io.IOException;
import java.nio.channels.FileChannel;
import java.nio.channels.FileLock;
import java.nio.channels.OverlappingFileLockException;
import java.nio.file.StandardOpenOption;
import java.util.function.Consumer;
import org.apache.activemq.artemis.quorum.DistributedLock;
final class FileDistributedLock implements DistributedLock {
private final String lockId;
private final Consumer<String> onClosedLock;
private boolean closed;
private FileLock fileLock;
private final FileChannel channel;
FileDistributedLock(Consumer<String> onClosedLock, File locksFolder, String lockId) throws IOException {
this.onClosedLock = onClosedLock;
this.lockId = lockId;
this.closed = false;
this.fileLock = null;
this.channel = FileChannel.open(new File(locksFolder, lockId).toPath(), StandardOpenOption.CREATE, StandardOpenOption.READ, StandardOpenOption.WRITE);
}
private void checkNotClosed() {
if (closed) {
throw new IllegalStateException("This lock is closed");
}
}
@Override
public String getLockId() {
checkNotClosed();
return lockId;
}
@Override
public boolean isHeldByCaller() {
checkNotClosed();
final FileLock fileLock = this.fileLock;
if (fileLock == null) {
return false;
}
return fileLock.isValid();
}
@Override
public boolean tryLock() {
checkNotClosed();
final FileLock fileLock = this.fileLock;
if (fileLock != null) {
throw new IllegalStateException("unlock first");
}
final FileLock lock;
try {
lock = channel.tryLock();
} catch (OverlappingFileLockException o) {
// this process already hold this lock, but not this manager
return false;
} catch (Throwable t) {
throw new IllegalStateException(t);
}
if (lock == null) {
return false;
}
this.fileLock = lock;
return true;
}
@Override
public void unlock() {
checkNotClosed();
final FileLock fileLock = this.fileLock;
if (fileLock != null) {
this.fileLock = null;
try {
fileLock.close();
} catch (IOException e) {
// noop
}
}
}
@Override
public void addListener(UnavailableLockListener listener) {
checkNotClosed();
// noop
}
@Override
public void removeListener(UnavailableLockListener listener) {
checkNotClosed();
// noop
}
public boolean isClosed() {
return closed;
}
public void close(boolean useCallback) {
if (closed) {
return;
}
try {
if (useCallback) {
onClosedLock.accept(lockId);
}
unlock();
channel.close();
} catch (IOException e) {
// ignore it
} finally {
closed = true;
}
}
@Override
public void close() {
close(true);
}
public FileChannel getChannel() {
return channel;
}
}

View File

@ -0,0 +1,171 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.quorum.zookeeper;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.UUID;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.TimeUnit;
import org.apache.activemq.artemis.quorum.DistributedLock;
import org.apache.activemq.artemis.quorum.UnavailableStateException;
import org.apache.activemq.artemis.quorum.zookeeper.CuratorDistributedPrimitiveManager.PrimitiveId;
import org.apache.curator.framework.recipes.locks.InterProcessSemaphoreV2;
import org.apache.curator.framework.recipes.locks.Lease;
final class CuratorDistributedLock extends CuratorDistributedPrimitive implements DistributedLock {
private final InterProcessSemaphoreV2 ipcSem;
private final CopyOnWriteArrayList<UnavailableLockListener> listeners;
private Lease lease;
private byte[] leaseVersion;
CuratorDistributedLock(PrimitiveId id, CuratorDistributedPrimitiveManager manager, InterProcessSemaphoreV2 ipcSem) {
super(id, manager);
this.ipcSem = ipcSem;
this.listeners = new CopyOnWriteArrayList<>();
this.leaseVersion = null;
}
@Override
protected void handleReconnected() {
super.handleReconnected();
if (leaseVersion != null) {
assert lease != null;
try {
if (Arrays.equals(lease.getData(), leaseVersion)) {
return;
}
onLost();
} catch (Exception e) {
onLost();
}
}
}
@Override
protected void handleLost() {
super.handleLost();
lease = null;
leaseVersion = null;
for (UnavailableLockListener listener : listeners) {
listener.onUnavailableLockEvent();
}
}
@Override
public String getLockId() {
return getId().id;
}
@Override
public boolean isHeldByCaller() throws UnavailableStateException {
return run(() -> {
checkUnavailable();
if (lease == null) {
return false;
}
assert leaseVersion != null;
try {
return Arrays.equals(lease.getData(), leaseVersion);
} catch (Throwable t) {
throw new UnavailableStateException(t);
}
});
}
@Override
public boolean tryLock() throws UnavailableStateException, InterruptedException {
return tryRun(() -> {
if (lease != null) {
throw new IllegalStateException("unlock first");
}
checkUnavailable();
try {
final byte[] leaseVersion = UUID.randomUUID().toString().getBytes(StandardCharsets.UTF_8);
ipcSem.setNodeData(leaseVersion);
lease = ipcSem.acquire(0, TimeUnit.NANOSECONDS);
if (lease == null) {
ipcSem.setNodeData(null);
return false;
}
this.leaseVersion = leaseVersion;
assert Arrays.equals(lease.getData(), leaseVersion);
return true;
} catch (InterruptedException ie) {
throw ie;
} catch (Throwable e) {
throw new UnavailableStateException(e);
}
});
}
@Override
public void unlock() throws UnavailableStateException {
run(() -> {
checkUnavailable();
final Lease lease = this.lease;
if (lease != null) {
this.lease = null;
this.leaseVersion = null;
try {
ipcSem.returnLease(lease);
} catch (Throwable e) {
throw new UnavailableStateException(e);
}
}
return null;
});
}
@Override
public void addListener(UnavailableLockListener listener) {
run(() -> {
listeners.add(listener);
fireUnavailableListener(listener::onUnavailableLockEvent);
return null;
});
}
@Override
public void removeListener(UnavailableLockListener listener) {
run(() -> {
listeners.remove(listener);
return null;
});
}
@Override
protected void handleClosed() {
super.handleClosed();
listeners.clear();
final Lease lease = this.lease;
if (lease == null) {
return;
}
this.lease = null;
if (isUnavailable()) {
return;
}
try {
ipcSem.returnLease(lease);
} catch (Throwable t) {
// TODO silent, but debug ;)
}
}
}

View File

@ -0,0 +1,172 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.quorum.zookeeper;
import org.apache.activemq.artemis.quorum.UnavailableStateException;
import org.apache.activemq.artemis.quorum.zookeeper.CuratorDistributedPrimitiveManager.PrimitiveId;
import static org.apache.activemq.artemis.quorum.zookeeper.CuratorDistributedPrimitiveManager.PrimitiveType.validatePrimitiveInstance;
public abstract class CuratorDistributedPrimitive implements AutoCloseable {
// this is used to prevent deadlocks on close
private final CuratorDistributedPrimitiveManager manager;
private final PrimitiveId id;
private boolean unavailable;
private boolean closed;
protected CuratorDistributedPrimitive(PrimitiveId id, CuratorDistributedPrimitiveManager manager) {
this.id = id;
this.manager = manager;
this.closed = false;
this.unavailable = false;
validatePrimitiveInstance(this);
}
final PrimitiveId getId() {
return id;
}
final void onReconnected() {
synchronized (manager) {
if (closed || unavailable) {
return;
}
handleReconnected();
}
}
protected void handleReconnected() {
}
final void onLost() {
synchronized (manager) {
if (closed || unavailable) {
return;
}
unavailable = true;
handleLost();
}
}
protected void handleLost() {
}
final void onSuspended() {
synchronized (manager) {
if (closed || unavailable) {
return;
}
handleSuspended();
}
}
protected void handleSuspended() {
}
final void onRemoved() {
close(false);
}
private void checkNotClosed() {
if (closed) {
throw new IllegalStateException("This lock is closed");
}
}
@FunctionalInterface
protected interface PrimitiveAction<R, T extends Throwable> {
R call() throws T;
}
@FunctionalInterface
protected interface InterruptablePrimitiveAction<R, T extends Throwable> {
R call() throws InterruptedException, T;
}
protected final void checkUnavailable() throws UnavailableStateException {
if (unavailable) {
throw new UnavailableStateException(id.type + " with id = " + id.id + " isn't available");
}
}
protected final void fireUnavailableListener(Runnable task) {
run(() -> {
if (!unavailable) {
return false;
}
manager.startHandlingEvents();
try {
task.run();
} finally {
manager.completeHandlingEvents();
}
return true;
});
}
protected final <R, T extends Throwable> R run(PrimitiveAction<R, T> action) throws T {
synchronized (manager) {
manager.checkHandlingEvents();
checkNotClosed();
return action.call();
}
}
protected final <R, T extends Throwable> R tryRun(InterruptablePrimitiveAction<R, T> action) throws InterruptedException, T {
synchronized (manager) {
manager.checkHandlingEvents();
checkNotClosed();
return action.call();
}
}
private void close(boolean remove) {
synchronized (manager) {
manager.checkHandlingEvents();
if (closed) {
return;
}
closed = true;
if (remove) {
manager.remove(this);
}
handleClosed();
}
}
protected void handleClosed() {
}
protected final boolean isUnavailable() {
synchronized (manager) {
return unavailable;
}
}
@Override
public final void close() {
close(true);
}
}

View File

@ -0,0 +1,367 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.quorum.zookeeper;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.activemq.artemis.quorum.DistributedLock;
import org.apache.activemq.artemis.quorum.DistributedPrimitiveManager;
import org.apache.activemq.artemis.quorum.MutableLong;
import org.apache.curator.framework.CuratorFramework;
import org.apache.curator.framework.CuratorFrameworkFactory;
import org.apache.curator.framework.recipes.atomic.DistributedAtomicLong;
import org.apache.curator.framework.recipes.locks.InterProcessSemaphoreV2;
import org.apache.curator.framework.state.ConnectionState;
import org.apache.curator.framework.state.ConnectionStateListener;
import org.apache.curator.retry.RetryForever;
import org.apache.curator.retry.RetryNTimes;
import static java.util.Objects.requireNonNull;
import static java.util.stream.Collectors.joining;
public class CuratorDistributedPrimitiveManager implements DistributedPrimitiveManager, ConnectionStateListener {
enum PrimitiveType {
lock, mutableLong;
static <T extends CuratorDistributedPrimitive> T validatePrimitiveInstance(T primitive) {
if (primitive == null) {
return null;
}
boolean valid = false;
switch (primitive.getId().type) {
case lock:
valid = primitive instanceof CuratorDistributedLock;
break;
case mutableLong:
valid = primitive instanceof CuratorMutableLong;
break;
}
if (!valid) {
throw new AssertionError("Implementation error: " + primitive.getClass() + " is wrongly considered " + primitive.getId().type);
}
return primitive;
}
}
static final class PrimitiveId {
final String id;
final PrimitiveType type;
private PrimitiveId(String id, PrimitiveType type) {
this.id = requireNonNull(id);
this.type = requireNonNull(type);
}
static PrimitiveId of(String id, PrimitiveType type) {
return new PrimitiveId(id, type);
}
@Override
public boolean equals(Object o) {
if (this == o)
return true;
if (o == null || getClass() != o.getClass())
return false;
PrimitiveId that = (PrimitiveId) o;
if (!Objects.equals(id, that.id))
return false;
return type == that.type;
}
@Override
public int hashCode() {
int result = id != null ? id.hashCode() : 0;
result = 31 * result + (type != null ? type.hashCode() : 0);
return result;
}
}
private static final String CONNECT_STRING_PARAM = "connect-string";
private static final String NAMESPACE_PARAM = "namespace";
private static final String SESSION_MS_PARAM = "session-ms";
private static final String SESSION_PERCENT_PARAM = "session-percent";
private static final String CONNECTION_MS_PARAM = "connection-ms";
private static final String RETRIES_PARAM = "retries";
private static final String RETRIES_MS_PARAM = "retries-ms";
private static final Set<String> VALID_PARAMS = Stream.of(
CONNECT_STRING_PARAM,
NAMESPACE_PARAM,
SESSION_MS_PARAM,
SESSION_PERCENT_PARAM,
CONNECTION_MS_PARAM,
RETRIES_PARAM,
RETRIES_MS_PARAM).collect(Collectors.toSet());
private static final String VALID_PARAMS_ON_ERROR = VALID_PARAMS.stream().collect(joining(","));
// It's 9 times the default ZK tick time ie 2000 ms
private static final String DEFAULT_SESSION_TIMEOUT_MS = Integer.toString(18_000);
private static final String DEFAULT_CONNECTION_TIMEOUT_MS = Integer.toString(8_000);
private static final String DEFAULT_RETRIES = Integer.toString(1);
private static final String DEFAULT_RETRIES_MS = Integer.toString(1000);
// why 1/3 of the session? https://cwiki.apache.org/confluence/display/CURATOR/TN14
private static final String DEFAULT_SESSION_PERCENT = Integer.toString(33);
private static Map<String, String> validateParameters(Map<String, String> config) {
config.forEach((parameterName, ignore) -> validateParameter(parameterName));
return config;
}
private static void validateParameter(String parameterName) {
if (!VALID_PARAMS.contains(parameterName)) {
throw new IllegalArgumentException("non existent parameter " + parameterName + ": accepted list is " + VALID_PARAMS_ON_ERROR);
}
}
private CuratorFramework client;
private final Map<PrimitiveId, CuratorDistributedPrimitive> primitives;
private CopyOnWriteArrayList<UnavailableManagerListener> listeners;
private boolean unavailable;
private boolean handlingEvents;
private final CuratorFrameworkFactory.Builder curatorBuilder;
public CuratorDistributedPrimitiveManager(Map<String, String> config) {
this(validateParameters(config), true);
}
private CuratorDistributedPrimitiveManager(Map<String, String> config, boolean ignore) {
this(config.get(CONNECT_STRING_PARAM),
config.get(NAMESPACE_PARAM),
Integer.parseInt(config.getOrDefault(SESSION_MS_PARAM, DEFAULT_SESSION_TIMEOUT_MS)),
Integer.parseInt(config.getOrDefault(SESSION_PERCENT_PARAM, DEFAULT_SESSION_PERCENT)),
Integer.parseInt(config.getOrDefault(CONNECTION_MS_PARAM, DEFAULT_CONNECTION_TIMEOUT_MS)),
Integer.parseInt(config.getOrDefault(RETRIES_PARAM, DEFAULT_RETRIES)),
Integer.parseInt(config.getOrDefault(RETRIES_MS_PARAM, DEFAULT_RETRIES_MS)));
}
private CuratorDistributedPrimitiveManager(String connectString,
String namespace,
int sessionMs,
int sessionPercent,
int connectionMs,
int retries,
int retriesMs) {
curatorBuilder = CuratorFrameworkFactory.builder()
.connectString(connectString)
.namespace(namespace)
.sessionTimeoutMs(sessionMs)
.connectionTimeoutMs(connectionMs)
.retryPolicy(retries >= 0 ? new RetryNTimes(retries, retriesMs) : new RetryForever(retriesMs))
.simulatedSessionExpirationPercent(sessionPercent);
this.primitives = new HashMap<>();
this.listeners = null;
this.unavailable = false;
this.handlingEvents = false;
}
@Override
public synchronized boolean isStarted() {
checkHandlingEvents();
return client != null;
}
@Override
public synchronized void addUnavailableManagerListener(UnavailableManagerListener listener) {
checkHandlingEvents();
if (listeners == null) {
return;
}
listeners.add(listener);
if (unavailable) {
startHandlingEvents();
try {
listener.onUnavailableManagerEvent();
} finally {
completeHandlingEvents();
}
}
}
@Override
public synchronized void removeUnavailableManagerListener(UnavailableManagerListener listener) {
checkHandlingEvents();
if (listeners == null) {
return;
}
listeners.remove(listener);
}
@Override
public synchronized boolean start(long timeout, TimeUnit unit) throws InterruptedException, ExecutionException {
checkHandlingEvents();
if (timeout >= 0) {
if (timeout > Integer.MAX_VALUE) {
throw new IllegalArgumentException("curator manager won't support too long timeout ie >" + Integer.MAX_VALUE);
}
requireNonNull(unit);
}
if (client != null) {
return true;
}
final CuratorFramework client = curatorBuilder.build();
try {
client.start();
if (!client.blockUntilConnected((int) timeout, unit)) {
client.close();
return false;
}
this.client = client;
this.listeners = new CopyOnWriteArrayList<>();
client.getConnectionStateListenable().addListener(this);
return true;
} catch (InterruptedException e) {
client.close();
throw e;
}
}
@Override
public synchronized void start() throws InterruptedException, ExecutionException {
start(-1, null);
}
@Override
public synchronized void stop() {
checkHandlingEvents();
final CuratorFramework client = this.client;
if (client == null) {
return;
}
this.client = null;
unavailable = false;
listeners.clear();
this.listeners = null;
client.getConnectionStateListenable().removeListener(this);
primitives.forEach((id, primitive) -> {
try {
primitive.onRemoved();
} catch (Throwable t) {
// TODO log?
}
});
primitives.clear();
client.close();
}
private synchronized <T extends CuratorDistributedPrimitive> T getPrimitive(PrimitiveId id,
Function<PrimitiveId, ? extends T> primitiveFactory) {
checkHandlingEvents();
requireNonNull(id);
if (client == null) {
throw new IllegalStateException("manager isn't started yet!");
}
final CuratorDistributedPrimitive primitive = PrimitiveType.validatePrimitiveInstance(primitives.get(id));
if (primitive != null) {
return (T) primitive;
}
final T newPrimitive = PrimitiveType.validatePrimitiveInstance(primitiveFactory.apply(id));
primitives.put(id, newPrimitive);
if (unavailable) {
startHandlingEvents();
try {
newPrimitive.onLost();
} finally {
completeHandlingEvents();
}
}
return newPrimitive;
}
@Override
public DistributedLock getDistributedLock(String lockId) {
return getPrimitive(PrimitiveId.of(lockId, PrimitiveType.lock),
id -> new CuratorDistributedLock(id, this,
new InterProcessSemaphoreV2(client, "/" + id.id + "/locks", 1)));
}
@Override
public MutableLong getMutableLong(String mutableLongId) {
return getPrimitive(PrimitiveId.of(mutableLongId, PrimitiveType.mutableLong),
id -> new CuratorMutableLong(id, this,
new DistributedAtomicLong(client, "/" + mutableLongId + "/activation-sequence", new RetryNTimes(0, 0))));
}
protected void startHandlingEvents() {
handlingEvents = true;
}
protected void completeHandlingEvents() {
handlingEvents = false;
}
protected void checkHandlingEvents() {
if (client == null) {
return;
}
if (handlingEvents) {
throw new IllegalStateException("UnavailableManagerListener isn't supposed to modify the manager or its primitives on event handling!");
}
}
@Override
public synchronized void stateChanged(CuratorFramework client, ConnectionState newState) {
if (this.client != client) {
return;
}
if (unavailable) {
return;
}
startHandlingEvents();
try {
switch (newState) {
case LOST:
unavailable = true;
listeners.forEach(listener -> listener.onUnavailableManagerEvent());
primitives.forEach((id, primitive) -> primitive.onLost());
break;
case RECONNECTED:
primitives.forEach((id, primitive) -> primitive.onReconnected());
break;
case SUSPENDED:
primitives.forEach((id, primitive) -> primitive.onSuspended());
break;
}
} finally {
completeHandlingEvents();
}
}
/**
* Used for testing purposes
*/
public synchronized CuratorFramework getCurator() {
checkHandlingEvents();
return client;
}
public synchronized void remove(CuratorDistributedPrimitive primitive) {
checkHandlingEvents();
primitives.remove(primitive.getId());
}
}

View File

@ -0,0 +1,67 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.quorum.zookeeper;
import org.apache.activemq.artemis.quorum.MutableLong;
import org.apache.activemq.artemis.quorum.UnavailableStateException;
import org.apache.activemq.artemis.quorum.zookeeper.CuratorDistributedPrimitiveManager.PrimitiveId;
import org.apache.curator.framework.recipes.atomic.AtomicValue;
import org.apache.curator.framework.recipes.atomic.DistributedAtomicLong;
final class CuratorMutableLong extends CuratorDistributedPrimitive implements MutableLong {
private final DistributedAtomicLong atomicLong;
CuratorMutableLong(PrimitiveId id, CuratorDistributedPrimitiveManager manager, DistributedAtomicLong atomicLong) {
super(id, manager);
this.atomicLong = atomicLong;
}
@Override
public String getMutableLongId() {
return getId().id;
}
@Override
public long get() throws UnavailableStateException {
return run(() -> {
checkUnavailable();
try {
AtomicValue<Long> atomicValue = atomicLong.get();
if (!atomicValue.succeeded()) {
throw new UnavailableStateException("cannot query long " + getId());
}
return atomicValue.postValue();
} catch (Throwable e) {
throw new UnavailableStateException(e);
}
});
}
@Override
public void set(long value) throws UnavailableStateException {
run(() -> {
checkUnavailable();
try {
atomicLong.forceSet(value);
return null;
} catch (Throwable e) {
throw new UnavailableStateException(e);
}
});
}
}

View File

@ -0,0 +1,298 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.quorum;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.function.Consumer;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
public abstract class DistributedLockTest {
private final ArrayList<AutoCloseable> closeables = new ArrayList<>();
@Before
public void setupEnv() throws Throwable {
}
protected abstract void configureManager(Map<String, String> config);
protected abstract String managerClassName();
@After
public void tearDownEnv() throws Throwable {
closeables.forEach(closeables -> {
try {
closeables.close();
} catch (Throwable t) {
// silent here
}
});
}
protected DistributedPrimitiveManager createManagedDistributeManager() {
return createManagedDistributeManager(stringStringMap -> {
});
}
protected DistributedPrimitiveManager createManagedDistributeManager(Consumer<? super Map<String, String>> defaultConfiguration) {
try {
final HashMap<String, String> config = new HashMap<>();
configureManager(config);
defaultConfiguration.accept(config);
final DistributedPrimitiveManager manager = DistributedPrimitiveManager.newInstanceOf(managerClassName(), config);
closeables.add(manager);
return manager;
} catch (Exception e) {
throw new RuntimeException(e);
}
}
@Test
public void managerReturnsSameLockIfNotClosed() throws ExecutionException, InterruptedException, TimeoutException {
DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.start();
Assert.assertSame(manager.getDistributedLock("a"), manager.getDistributedLock("a"));
}
@Test
public void managerReturnsDifferentLocksIfClosed() throws ExecutionException, InterruptedException, TimeoutException {
DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.start();
DistributedLock closedLock = manager.getDistributedLock("a");
closedLock.close();
Assert.assertNotSame(closedLock, manager.getDistributedLock("a"));
}
@Test
public void managerReturnsDifferentLocksOnRestart() throws ExecutionException, InterruptedException, TimeoutException {
DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.start();
DistributedLock closedLock = manager.getDistributedLock("a");
manager.stop();
manager.start();
Assert.assertNotSame(closedLock, manager.getDistributedLock("a"));
}
@Test(expected = IllegalStateException.class)
public void managerCannotGetLockIfNotStarted() throws ExecutionException, InterruptedException, TimeoutException {
DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.getDistributedLock("a");
}
@Test(expected = NullPointerException.class)
public void managerCannotGetLockWithNullLockId() throws ExecutionException, InterruptedException, TimeoutException {
DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.start();
manager.getDistributedLock(null);
}
@Test
public void closingLockUnlockIt() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.start();
DistributedLock closedLock = manager.getDistributedLock("a");
Assert.assertTrue(closedLock.tryLock());
closedLock.close();
Assert.assertTrue(manager.getDistributedLock("a").tryLock());
}
@Test
public void managerStopUnlockLocks() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.start();
Assert.assertTrue(manager.getDistributedLock("a").tryLock());
Assert.assertTrue(manager.getDistributedLock("b").tryLock());
manager.stop();
manager.start();
Assert.assertFalse(manager.getDistributedLock("a").isHeldByCaller());
Assert.assertFalse(manager.getDistributedLock("b").isHeldByCaller());
}
@Test
public void acquireAndReleaseLock() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.start();
DistributedLock lock = manager.getDistributedLock("a");
Assert.assertFalse(lock.isHeldByCaller());
Assert.assertTrue(lock.tryLock());
Assert.assertTrue(lock.isHeldByCaller());
lock.unlock();
Assert.assertFalse(lock.isHeldByCaller());
}
@Test(expected = IllegalStateException.class)
public void cannotAcquireSameLockTwice() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.start();
DistributedLock lock = manager.getDistributedLock("a");
Assert.assertTrue(lock.tryLock());
lock.tryLock();
}
@Test
public void heldLockIsVisibleByDifferentManagers() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
DistributedPrimitiveManager ownerManager = createManagedDistributeManager();
DistributedPrimitiveManager observerManager = createManagedDistributeManager();
ownerManager.start();
observerManager.start();
Assert.assertTrue(ownerManager.getDistributedLock("a").tryLock());
Assert.assertTrue(ownerManager.getDistributedLock("a").isHeldByCaller());
Assert.assertFalse(observerManager.getDistributedLock("a").isHeldByCaller());
}
@Test
public void unlockedLockIsVisibleByDifferentManagers() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
DistributedPrimitiveManager ownerManager = createManagedDistributeManager();
DistributedPrimitiveManager observerManager = createManagedDistributeManager();
ownerManager.start();
observerManager.start();
Assert.assertTrue(ownerManager.getDistributedLock("a").tryLock());
ownerManager.getDistributedLock("a").unlock();
Assert.assertFalse(observerManager.getDistributedLock("a").isHeldByCaller());
Assert.assertFalse(ownerManager.getDistributedLock("a").isHeldByCaller());
Assert.assertTrue(observerManager.getDistributedLock("a").tryLock());
}
@Test
public void cannotAcquireSameLockFromDifferentManagers() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
DistributedPrimitiveManager ownerManager = createManagedDistributeManager();
DistributedPrimitiveManager notOwnerManager = createManagedDistributeManager();
ownerManager.start();
notOwnerManager.start();
Assert.assertTrue(ownerManager.getDistributedLock("a").tryLock());
Assert.assertFalse(notOwnerManager.getDistributedLock("a").tryLock());
}
@Test
public void cannotUnlockFromNotOwnerManager() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
DistributedPrimitiveManager ownerManager = createManagedDistributeManager();
DistributedPrimitiveManager notOwnerManager = createManagedDistributeManager();
ownerManager.start();
notOwnerManager.start();
Assert.assertTrue(ownerManager.getDistributedLock("a").tryLock());
notOwnerManager.getDistributedLock("a").unlock();
Assert.assertFalse(notOwnerManager.getDistributedLock("a").isHeldByCaller());
Assert.assertTrue(ownerManager.getDistributedLock("a").isHeldByCaller());
}
@Test
public void timedTryLockSucceedWithShortTimeout() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.start();
DistributedLock backgroundLock = manager.getDistributedLock("a");
Assert.assertTrue(backgroundLock.tryLock(1, TimeUnit.NANOSECONDS));
}
@Test
public void timedTryLockFailAfterTimeout() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.start();
DistributedPrimitiveManager otherManager = createManagedDistributeManager();
otherManager.start();
Assert.assertTrue(otherManager.getDistributedLock("a").tryLock());
final long start = System.nanoTime();
final long timeoutSec = 1;
Assert.assertFalse(manager.getDistributedLock("a").tryLock(timeoutSec, TimeUnit.SECONDS));
final long elapsed = TimeUnit.NANOSECONDS.toSeconds(System.nanoTime() - start);
assertThat(elapsed, greaterThanOrEqualTo(timeoutSec));
}
@Test
public void timedTryLockSuccess() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.start();
DistributedPrimitiveManager otherManager = createManagedDistributeManager();
otherManager.start();
Assert.assertTrue(otherManager.getDistributedLock("a").tryLock());
DistributedLock backgroundLock = manager.getDistributedLock("a");
CompletableFuture<Boolean> acquired = new CompletableFuture<>();
CountDownLatch startedTry = new CountDownLatch(1);
Thread tryLockThread = new Thread(() -> {
startedTry.countDown();
try {
if (!backgroundLock.tryLock(Long.MAX_VALUE, TimeUnit.DAYS)) {
acquired.complete(false);
} else {
acquired.complete(true);
}
} catch (Throwable e) {
acquired.complete(false);
}
});
tryLockThread.start();
Assert.assertTrue(startedTry.await(10, TimeUnit.SECONDS));
otherManager.getDistributedLock("a").unlock();
Assert.assertTrue(acquired.get(4, TimeUnit.SECONDS));
}
@Test
public void interruptStopTimedTryLock() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.start();
DistributedPrimitiveManager otherManager = createManagedDistributeManager();
otherManager.start();
Assert.assertTrue(otherManager.getDistributedLock("a").tryLock());
DistributedLock backgroundLock = manager.getDistributedLock("a");
CompletableFuture<Boolean> interrupted = new CompletableFuture<>();
CountDownLatch startedTry = new CountDownLatch(1);
Thread tryLockThread = new Thread(() -> {
startedTry.countDown();
try {
backgroundLock.tryLock(Long.MAX_VALUE, TimeUnit.DAYS);
interrupted.complete(false);
} catch (UnavailableStateException e) {
interrupted.complete(false);
} catch (InterruptedException e) {
interrupted.complete(true);
}
});
tryLockThread.start();
Assert.assertTrue(startedTry.await(10, TimeUnit.SECONDS));
// let background lock to perform some tries
TimeUnit.SECONDS.sleep(1);
tryLockThread.interrupt();
Assert.assertTrue(interrupted.get(4, TimeUnit.SECONDS));
}
@Test
public void lockAndMutableLongWithSameIdCanExistsTogether() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.start();
final String id = "a";
Assert.assertTrue(manager.getDistributedLock(id).tryLock());
Assert.assertEquals(0, manager.getMutableLong(id).get());
manager.getMutableLong(id).set(1);
Assert.assertTrue(manager.getDistributedLock(id).isHeldByCaller());
Assert.assertEquals(1, manager.getMutableLong(id).get());
}
}

View File

@ -0,0 +1,70 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.quorum.file;
import java.io.File;
import java.lang.reflect.InvocationTargetException;
import java.util.Collections;
import java.util.Map;
import org.apache.activemq.artemis.quorum.DistributedLockTest;
import org.apache.activemq.artemis.quorum.DistributedPrimitiveManager;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
public class FileDistributedLockTest extends DistributedLockTest {
@Rule
public TemporaryFolder tmpFolder = new TemporaryFolder();
private File locksFolder;
@Before
@Override
public void setupEnv() throws Throwable {
locksFolder = tmpFolder.newFolder("locks-folder");
super.setupEnv();
}
@Override
protected void configureManager(Map<String, String> config) {
config.put("locks-folder", locksFolder.toString());
}
@Override
protected String managerClassName() {
return FileBasedPrimitiveManager.class.getName();
}
@Test
public void reflectiveManagerCreation() throws Exception {
DistributedPrimitiveManager.newInstanceOf(managerClassName(), Collections.singletonMap("locks-folder", locksFolder.toString()));
}
@Test(expected = InvocationTargetException.class)
public void reflectiveManagerCreationFailWithoutLocksFolder() throws Exception {
DistributedPrimitiveManager.newInstanceOf(managerClassName(), Collections.emptyMap());
}
@Test(expected = InvocationTargetException.class)
public void reflectiveManagerCreationFailIfLocksFolderIsNotFolder() throws Exception {
DistributedPrimitiveManager.newInstanceOf(managerClassName(), Collections.singletonMap("locks-folder", tmpFolder.newFile().toString()));
}
}

View File

@ -0,0 +1,364 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.quorum.zookeeper;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;
import java.util.stream.Collectors;
import com.google.common.base.Predicates;
import org.apache.activemq.artemis.quorum.DistributedLock;
import org.apache.activemq.artemis.quorum.DistributedPrimitiveManager;
import org.apache.activemq.artemis.quorum.UnavailableStateException;
import org.apache.activemq.artemis.utils.Wait;
import org.apache.curator.test.InstanceSpec;
import org.apache.curator.test.TestingCluster;
import org.apache.activemq.artemis.quorum.DistributedLockTest;
import org.apache.curator.test.TestingZooKeeperServer;
import org.junit.Assert;
import org.junit.Assume;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import static java.lang.Boolean.TRUE;
import static org.hamcrest.Matchers.greaterThan;
@RunWith(value = Parameterized.class)
public class CuratorDistributedLockTest extends DistributedLockTest {
private static final int BASE_SERVER_PORT = 6666;
private static final int CONNECTION_MS = 2000;
// Beware: the server tick must be small enough that to let the session to be correctly expired
private static final int SESSION_MS = 6000;
private static final int SERVER_TICK_MS = 2000;
private static final int RETRIES_MS = 100;
private static final int RETRIES = 1;
@Parameterized.Parameter
public int nodes;
@Rule
public TemporaryFolder tmpFolder = new TemporaryFolder();
private TestingCluster testingServer;
private InstanceSpec[] clusterSpecs;
private String connectString;
@Parameterized.Parameters(name = "nodes={0}")
public static Iterable<Object[]> getTestParameters() {
return Arrays.asList(new Object[][]{{3}, {5}});
}
@Override
public void setupEnv() throws Throwable {
clusterSpecs = new InstanceSpec[nodes];
for (int i = 0; i < nodes; i++) {
clusterSpecs[i] = new InstanceSpec(tmpFolder.newFolder(), BASE_SERVER_PORT + i, -1, -1, true, -1, SERVER_TICK_MS, -1);
}
testingServer = new TestingCluster(clusterSpecs);
testingServer.start();
// start waits for quorumPeer!=null but not that it has started...
Wait.waitFor(this::ensembleHasLeader);
connectString = testingServer.getConnectString();
super.setupEnv();
}
@Override
public void tearDownEnv() throws Throwable {
super.tearDownEnv();
testingServer.close();
}
@Override
protected void configureManager(Map<String, String> config) {
config.put("connect-string", connectString);
config.put("session-ms", Integer.toString(SESSION_MS));
config.put("connection-ms", Integer.toString(CONNECTION_MS));
config.put("retries", Integer.toString(RETRIES));
config.put("retries-ms", Integer.toString(RETRIES_MS));
}
@Override
protected String managerClassName() {
return CuratorDistributedPrimitiveManager.class.getName();
}
@Test(expected = RuntimeException.class)
public void cannotCreateManagerWithNotValidParameterNames() {
final DistributedPrimitiveManager manager = createManagedDistributeManager(config -> config.put("_", "_"));
}
@Test
public void canAcquireLocksFromDifferentNamespace() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
final DistributedPrimitiveManager manager1 = createManagedDistributeManager(config -> config.put("namespace", "1"));
manager1.start();
final DistributedPrimitiveManager manager2 = createManagedDistributeManager(config -> config.put("namespace", "2"));
manager2.start();
Assert.assertTrue(manager1.getDistributedLock("a").tryLock());
Assert.assertTrue(manager2.getDistributedLock("a").tryLock());
}
@Test
public void cannotStartManagerWithDisconnectedServer() throws IOException, ExecutionException, InterruptedException {
final DistributedPrimitiveManager manager = createManagedDistributeManager();
testingServer.close();
Assert.assertFalse(manager.start(1, TimeUnit.SECONDS));
}
@Test(expected = UnavailableStateException.class)
public void cannotAcquireLockWithDisconnectedServer() throws IOException, ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
final DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.start();
final DistributedLock lock = manager.getDistributedLock("a");
final CountDownLatch notAvailable = new CountDownLatch(1);
final DistributedLock.UnavailableLockListener listener = notAvailable::countDown;
lock.addListener(listener);
testingServer.close();
Assert.assertTrue(notAvailable.await(30, TimeUnit.SECONDS));
lock.tryLock();
}
@Test(expected = UnavailableStateException.class)
public void cannotTryLockWithDisconnectedServer() throws IOException, ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
final DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.start();
final DistributedLock lock = manager.getDistributedLock("a");
testingServer.close();
lock.tryLock();
}
@Test(expected = UnavailableStateException.class)
public void cannotCheckLockStatusWithDisconnectedServer() throws IOException, ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
final DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.start();
final DistributedLock lock = manager.getDistributedLock("a");
Assert.assertFalse(lock.isHeldByCaller());
Assert.assertTrue(lock.tryLock());
testingServer.close();
lock.isHeldByCaller();
}
@Test(expected = UnavailableStateException.class)
public void looseLockAfterServerStop() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException, IOException {
final DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.start();
final DistributedLock lock = manager.getDistributedLock("a");
Assert.assertTrue(lock.tryLock());
Assert.assertTrue(lock.isHeldByCaller());
final CountDownLatch notAvailable = new CountDownLatch(1);
final DistributedLock.UnavailableLockListener listener = notAvailable::countDown;
lock.addListener(listener);
Assert.assertEquals(1, notAvailable.getCount());
testingServer.close();
Assert.assertTrue(notAvailable.await(30, TimeUnit.SECONDS));
lock.isHeldByCaller();
}
@Test
public void canAcquireLockOnMajorityRestart() throws Exception {
final DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.start();
final DistributedLock lock = manager.getDistributedLock("a");
Assert.assertTrue(lock.tryLock());
Assert.assertTrue(lock.isHeldByCaller());
final CountDownLatch notAvailable = new CountDownLatch(1);
final DistributedLock.UnavailableLockListener listener = notAvailable::countDown;
lock.addListener(listener);
Assert.assertEquals(1, notAvailable.getCount());
testingServer.stop();
notAvailable.await();
manager.stop();
restartMajorityNodes(true);
final DistributedPrimitiveManager otherManager = createManagedDistributeManager();
otherManager.start();
// await more then the expected value, that depends by how curator session expiration is configured
TimeUnit.MILLISECONDS.sleep(SESSION_MS + SERVER_TICK_MS);
Assert.assertTrue(otherManager.getDistributedLock("a").tryLock());
}
@Test
public void cannotStartManagerWithoutQuorum() throws Exception {
Assume.assumeThat(nodes, greaterThan(1));
DistributedPrimitiveManager manager = createManagedDistributeManager();
stopMajorityNotLeaderNodes(true);
Assert.assertFalse(manager.start(2, TimeUnit.SECONDS));
Assert.assertFalse(manager.isStarted());
}
@Test(expected = UnavailableStateException.class)
public void cannotAcquireLockWithoutQuorum() throws Exception {
Assume.assumeThat(nodes, greaterThan(1));
DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.start();
stopMajorityNotLeaderNodes(true);
DistributedLock lock = manager.getDistributedLock("a");
lock.tryLock();
}
@Test
public void cannotCheckLockWithoutQuorum() throws Exception {
Assume.assumeThat(nodes, greaterThan(1));
DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.start();
stopMajorityNotLeaderNodes(true);
DistributedLock lock = manager.getDistributedLock("a");
final boolean held;
try {
held = lock.isHeldByCaller();
} catch (UnavailableStateException expected) {
return;
}
Assert.assertFalse(held);
}
@Test
public void canGetLockWithoutQuorum() throws Exception {
Assume.assumeThat(nodes, greaterThan(1));
DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.start();
stopMajorityNotLeaderNodes(true);
DistributedLock lock = manager.getDistributedLock("a");
Assert.assertNotNull(lock);
}
@Test
public void notifiedAsUnavailableWhileLoosingQuorum() throws Exception {
Assume.assumeThat(nodes, greaterThan(1));
DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.start();
DistributedLock lock = manager.getDistributedLock("a");
CountDownLatch unavailable = new CountDownLatch(1);
lock.addListener(unavailable::countDown);
stopMajorityNotLeaderNodes(true);
Assert.assertTrue(unavailable.await(SESSION_MS + SERVER_TICK_MS, TimeUnit.MILLISECONDS));
}
@Test
public void beNotifiedOnce() throws Exception {
Assume.assumeThat(nodes, greaterThan(1));
DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.start();
DistributedLock lock = manager.getDistributedLock("a");
final AtomicInteger unavailableManager = new AtomicInteger(0);
final AtomicInteger unavailableLock = new AtomicInteger(0);
manager.addUnavailableManagerListener(unavailableManager::incrementAndGet);
lock.addListener(unavailableLock::incrementAndGet);
stopMajorityNotLeaderNodes(true);
TimeUnit.MILLISECONDS.sleep(SESSION_MS + SERVER_TICK_MS + CONNECTION_MS);
Assert.assertEquals(1, unavailableLock.get());
Assert.assertEquals(1, unavailableManager.get());
}
@Test
public void beNotifiedOfUnavailabilityWhileBlockedOnTimedLock() throws Exception {
Assume.assumeThat(nodes, greaterThan(1));
DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.start();
DistributedLock lock = manager.getDistributedLock("a");
final AtomicInteger unavailableManager = new AtomicInteger(0);
final AtomicInteger unavailableLock = new AtomicInteger(0);
manager.addUnavailableManagerListener(unavailableManager::incrementAndGet);
lock.addListener(unavailableLock::incrementAndGet);
final DistributedPrimitiveManager otherManager = createManagedDistributeManager();
otherManager.start();
Assert.assertTrue(otherManager.getDistributedLock("a").tryLock());
final CountDownLatch startedTimedLock = new CountDownLatch(1);
final AtomicReference<Boolean> unavailableTimedLock = new AtomicReference<>(null);
Thread timedLock = new Thread(() -> {
startedTimedLock.countDown();
try {
lock.tryLock(Long.MAX_VALUE, TimeUnit.DAYS);
unavailableTimedLock.set(false);
} catch (UnavailableStateException e) {
unavailableTimedLock.set(true);
} catch (InterruptedException e) {
unavailableTimedLock.set(false);
}
});
timedLock.start();
Assert.assertTrue(startedTimedLock.await(10, TimeUnit.SECONDS));
TimeUnit.SECONDS.sleep(1);
stopMajorityNotLeaderNodes(true);
TimeUnit.MILLISECONDS.sleep(SESSION_MS + CONNECTION_MS);
Wait.waitFor(() -> unavailableLock.get() > 0, SERVER_TICK_MS);
Assert.assertEquals(1, unavailableManager.get());
Assert.assertEquals(TRUE, unavailableTimedLock.get());
}
@Test
public void beNotifiedOfAlreadyUnavailableManagerAfterAddingListener() throws Exception {
DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.start();
final AtomicBoolean unavailable = new AtomicBoolean(false);
DistributedPrimitiveManager.UnavailableManagerListener managerListener = () -> {
unavailable.set(true);
};
manager.addUnavailableManagerListener(managerListener);
Assert.assertFalse(unavailable.get());
stopMajorityNotLeaderNodes(true);
Wait.waitFor(unavailable::get);
manager.removeUnavailableManagerListener(managerListener);
final AtomicInteger unavailableOnRegister = new AtomicInteger();
manager.addUnavailableManagerListener(unavailableOnRegister::incrementAndGet);
Assert.assertEquals(1, unavailableOnRegister.get());
unavailableOnRegister.set(0);
try (DistributedLock lock = manager.getDistributedLock("a")) {
lock.addListener(unavailableOnRegister::incrementAndGet);
Assert.assertEquals(1, unavailableOnRegister.get());
}
}
private boolean ensembleHasLeader() {
return testingServer.getServers().stream().filter(CuratorDistributedLockTest::isLeader).count() != 0;
}
private static boolean isLeader(TestingZooKeeperServer server) {
long leaderId = server.getQuorumPeer().getLeaderId();
long id = server.getQuorumPeer().getId();
return id == leaderId;
}
private void stopMajorityNotLeaderNodes(boolean fromLast) throws Exception {
List<TestingZooKeeperServer> followers = testingServer.getServers().stream().filter(Predicates.not(CuratorDistributedLockTest::isLeader)).collect(Collectors.toList());
final int quorum = (nodes / 2) + 1;
for (int i = 0; i < quorum; i++) {
final int nodeIndex = fromLast ? (followers.size() - 1) - i : i;
followers.get(nodeIndex).stop();
}
}
private void restartMajorityNodes(boolean startFromLast) throws Exception {
final int quorum = (nodes / 2) + 1;
for (int i = 0; i < quorum; i++) {
final int nodeIndex = startFromLast ? (nodes - 1) - i : i;
if (!testingServer.restartServer(clusterSpecs[nodeIndex])) {
throw new IllegalStateException("errored while restarting " + clusterSpecs[nodeIndex]);
}
}
}
}

View File

@ -0,0 +1,140 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.quorum.zookeeper;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.function.Consumer;
import org.apache.activemq.artemis.quorum.DistributedPrimitiveManager;
import org.apache.curator.framework.CuratorFramework;
import org.apache.curator.test.InstanceSpec;
import org.apache.curator.test.TestingCluster;
import org.apache.curator.utils.ZKPaths;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.ZooKeeper;
import org.apache.zookeeper.data.Stat;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
public class CuratorDistributedPrimitiveManagerTest {
private final ArrayList<AutoCloseable> autoCloseables = new ArrayList<>();
private static final int BASE_SERVER_PORT = 6666;
private static final int CONNECTION_MS = 2000;
// Beware: the server tick must be small enough that to let the session to be correctly expired
private static final int SESSION_MS = 6000;
private static final int SERVER_TICK_MS = 2000;
private static final int RETRIES_MS = 100;
private static final int RETRIES = 1;
public int nodes = 1;
@Rule
public TemporaryFolder tmpFolder = new TemporaryFolder();
private TestingCluster testingServer;
private String connectString;
@Before
public void setupEnv() throws Throwable {
InstanceSpec[] clusterSpecs = new InstanceSpec[nodes];
for (int i = 0; i < nodes; i++) {
clusterSpecs[i] = new InstanceSpec(tmpFolder.newFolder(), BASE_SERVER_PORT + i, -1, -1, true, -1, SERVER_TICK_MS, -1);
}
testingServer = new TestingCluster(clusterSpecs);
testingServer.start();
connectString = testingServer.getConnectString();
}
@After
public void tearDownEnv() throws Throwable {
autoCloseables.forEach(closeables -> {
try {
closeables.close();
} catch (Throwable t) {
// silent here
}
});
testingServer.close();
}
protected void configureManager(Map<String, String> config) {
config.put("connect-string", connectString);
config.put("session-ms", Integer.toString(SESSION_MS));
config.put("connection-ms", Integer.toString(CONNECTION_MS));
config.put("retries", Integer.toString(RETRIES));
config.put("retries-ms", Integer.toString(RETRIES_MS));
}
protected DistributedPrimitiveManager createManagedDistributeManager(Consumer<? super Map<String, String>> defaultConfiguration) {
try {
final HashMap<String, String> config = new HashMap<>();
configureManager(config);
defaultConfiguration.accept(config);
final DistributedPrimitiveManager manager = DistributedPrimitiveManager.newInstanceOf(managerClassName(), config);
autoCloseables.add(manager);
return manager;
} catch (Exception e) {
throw new RuntimeException(e);
}
}
protected String managerClassName() {
return CuratorDistributedPrimitiveManager.class.getName();
}
@Test
public void verifyLayoutInZK() throws Exception {
final DistributedPrimitiveManager manager = createManagedDistributeManager(config -> config.put("namespace", "activemq-artemis"));
manager.start();
Assert.assertTrue(manager.getDistributedLock("journal-identity-000-111").tryLock());
Assert.assertTrue(manager.getMutableLong("journal-identity-000-111").compareAndSet(0, 1));
CuratorFramework curatorFramework = ((CuratorDistributedPrimitiveManager)manager).getCurator();
List<String> entries = new LinkedList<>();
dumpZK(curatorFramework.getZookeeperClient().getZooKeeper(), "/", entries);
Assert.assertTrue(entries.get(2).contains("activation-sequence"));
for (String entry: entries) {
System.err.println("ZK: " + entry);
}
}
private void dumpZK(ZooKeeper zooKeeper, String path, List<String> entries) throws InterruptedException, KeeperException {
List<String> children = ZKPaths.getSortedChildren(zooKeeper,path);
for (String s: children) {
if (!s.equals("zookeeper")) {
String qualifiedPath = (path.endsWith("/") ? path : path + "/") + s;
Stat stat = new Stat();
zooKeeper.getData(qualifiedPath, null, stat);
entries.add(qualifiedPath + ", data-len:" + stat.getDataLength() + ", ephemeral: " + (stat.getEphemeralOwner() != 0));
dumpZK(zooKeeper, qualifiedPath, entries);
}
}
}
}

View File

@ -85,6 +85,11 @@
<artifactId>artemis-core-client</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.activemq</groupId>
<artifactId>artemis-quorum-api</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.activemq</groupId>
<artifactId>activemq-artemis-native</artifactId>

View File

@ -22,6 +22,8 @@ import java.util.List;
import org.apache.activemq.artemis.api.config.ActiveMQDefaultConfiguration;
import org.apache.activemq.artemis.api.core.ActiveMQIllegalStateException;
import org.apache.activemq.artemis.api.core.TransportConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ColocatedPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.LiveOnlyPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicaPolicyConfiguration;
@ -31,6 +33,8 @@ import org.apache.activemq.artemis.core.config.ha.SharedStoreSlavePolicyConfigur
import org.apache.activemq.artemis.core.server.ActiveMQMessageBundle;
import org.apache.activemq.artemis.core.server.ActiveMQServer;
import org.apache.activemq.artemis.core.server.ActiveMQServerLogger;
import org.apache.activemq.artemis.core.server.cluster.ha.ReplicationBackupPolicy;
import org.apache.activemq.artemis.core.server.cluster.ha.ReplicationPrimaryPolicy;
import org.apache.activemq.artemis.core.server.cluster.ha.BackupPolicy;
import org.apache.activemq.artemis.core.server.cluster.ha.ColocatedPolicy;
import org.apache.activemq.artemis.core.server.cluster.ha.HAPolicy;
@ -79,6 +83,11 @@ public final class ConfigurationUtils {
ReplicaPolicyConfiguration pc = (ReplicaPolicyConfiguration) conf;
return new ReplicaPolicy(pc.getClusterName(), pc.getMaxSavedReplicatedJournalsSize(), pc.getGroupName(), pc.isRestartBackup(), pc.isAllowFailBack(), pc.getInitialReplicationSyncTimeout(), getScaleDownPolicy(pc.getScaleDownConfiguration()), server.getNetworkHealthCheck(), pc.getVoteOnReplicationFailure(), pc.getQuorumSize(), pc.getVoteRetries(), pc.getVoteRetryWait(), pc.getQuorumVoteWait(), pc.getRetryReplicationWait());
}
case PRIMARY:
return ReplicationPrimaryPolicy.with((ReplicationPrimaryPolicyConfiguration) conf);
case BACKUP: {
return ReplicationBackupPolicy.with((ReplicationBackupPolicyConfiguration) conf);
}
case SHARED_STORE_MASTER: {
SharedStoreMasterPolicyConfiguration pc = (SharedStoreMasterPolicyConfiguration) conf;
return new SharedStoreMasterPolicy(pc.isFailoverOnServerShutdown(), pc.isWaitForActivation());

View File

@ -26,7 +26,9 @@ public interface HAPolicyConfiguration extends Serializable {
REPLICA("Replica"),
SHARED_STORE_MASTER("Shared Store Master"),
SHARED_STORE_SLAVE("Shared Store Slave"),
COLOCATED("Colocated");
COLOCATED("Colocated"),
PRIMARY("Primary"),
BACKUP("Backup");
private String name;

View File

@ -0,0 +1,39 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.core.config.ha;
import java.io.Serializable;
import java.util.Map;
public class DistributedPrimitiveManagerConfiguration implements Serializable {
private final String className;
private final Map<String, String> properties;
public DistributedPrimitiveManagerConfiguration(String className, Map<String, String> properties) {
this.className = className;
this.properties = properties;
}
public Map<String, String> getProperties() {
return properties;
}
public String getClassName() {
return className;
}
}

View File

@ -0,0 +1,115 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.core.config.ha;
import org.apache.activemq.artemis.api.config.ActiveMQDefaultConfiguration;
import org.apache.activemq.artemis.core.config.HAPolicyConfiguration;
public class ReplicationBackupPolicyConfiguration implements HAPolicyConfiguration {
private String clusterName = null;
private int maxSavedReplicatedJournalsSize = ActiveMQDefaultConfiguration.getDefaultMaxSavedReplicatedJournalsSize();
private String groupName = null;
/*
* used in the replicated policy after failover
* */
private boolean allowFailBack = false;
private long initialReplicationSyncTimeout = ActiveMQDefaultConfiguration.getDefaultInitialReplicationSyncTimeout();
private long retryReplicationWait = ActiveMQDefaultConfiguration.getDefaultRetryReplicationWait();
private DistributedPrimitiveManagerConfiguration distributedManagerConfiguration = null;
public static final ReplicationBackupPolicyConfiguration withDefault() {
return new ReplicationBackupPolicyConfiguration();
}
private ReplicationBackupPolicyConfiguration() {
}
@Override
public HAPolicyConfiguration.TYPE getType() {
return TYPE.BACKUP;
}
public String getClusterName() {
return clusterName;
}
public ReplicationBackupPolicyConfiguration setClusterName(String clusterName) {
this.clusterName = clusterName;
return this;
}
public int getMaxSavedReplicatedJournalsSize() {
return maxSavedReplicatedJournalsSize;
}
public ReplicationBackupPolicyConfiguration setMaxSavedReplicatedJournalsSize(int maxSavedReplicatedJournalsSize) {
this.maxSavedReplicatedJournalsSize = maxSavedReplicatedJournalsSize;
return this;
}
public String getGroupName() {
return groupName;
}
public ReplicationBackupPolicyConfiguration setGroupName(String groupName) {
this.groupName = groupName;
return this;
}
public boolean isAllowFailBack() {
return allowFailBack;
}
public ReplicationBackupPolicyConfiguration setAllowFailBack(boolean allowFailBack) {
this.allowFailBack = allowFailBack;
return this;
}
public long getInitialReplicationSyncTimeout() {
return initialReplicationSyncTimeout;
}
public ReplicationBackupPolicyConfiguration setInitialReplicationSyncTimeout(long initialReplicationSyncTimeout) {
this.initialReplicationSyncTimeout = initialReplicationSyncTimeout;
return this;
}
public long getRetryReplicationWait() {
return retryReplicationWait;
}
public ReplicationBackupPolicyConfiguration setRetryReplicationWait(long retryReplicationWait) {
this.retryReplicationWait = retryReplicationWait;
return this;
}
public ReplicationBackupPolicyConfiguration setDistributedManagerConfiguration(DistributedPrimitiveManagerConfiguration configuration) {
this.distributedManagerConfiguration = configuration;
return this;
}
public DistributedPrimitiveManagerConfiguration getDistributedManagerConfiguration() {
return distributedManagerConfiguration;
}
}

View File

@ -0,0 +1,114 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.core.config.ha;
import org.apache.activemq.artemis.api.config.ActiveMQDefaultConfiguration;
import org.apache.activemq.artemis.core.config.HAPolicyConfiguration;
public class ReplicationPrimaryPolicyConfiguration implements HAPolicyConfiguration {
private String groupName = null;
private String clusterName = null;
private long initialReplicationSyncTimeout = ActiveMQDefaultConfiguration.getDefaultInitialReplicationSyncTimeout();
private Long retryReplicationWait = ActiveMQDefaultConfiguration.getDefaultRetryReplicationWait();
private DistributedPrimitiveManagerConfiguration distributedManagerConfiguration = null;
private String coordinationId = null;
public static ReplicationPrimaryPolicyConfiguration withDefault() {
return new ReplicationPrimaryPolicyConfiguration();
}
private ReplicationPrimaryPolicyConfiguration() {
}
@Override
public TYPE getType() {
return TYPE.PRIMARY;
}
public String getGroupName() {
return groupName;
}
public ReplicationPrimaryPolicyConfiguration setGroupName(String groupName) {
this.groupName = groupName;
return this;
}
public String getClusterName() {
return clusterName;
}
public ReplicationPrimaryPolicyConfiguration setClusterName(String clusterName) {
this.clusterName = clusterName;
return this;
}
public long getInitialReplicationSyncTimeout() {
return initialReplicationSyncTimeout;
}
public ReplicationPrimaryPolicyConfiguration setInitialReplicationSyncTimeout(long initialReplicationSyncTimeout) {
this.initialReplicationSyncTimeout = initialReplicationSyncTimeout;
return this;
}
public void setRetryReplicationWait(Long retryReplicationWait) {
this.retryReplicationWait = retryReplicationWait;
}
public Long getRetryReplicationWait() {
return retryReplicationWait;
}
public ReplicationPrimaryPolicyConfiguration setDistributedManagerConfiguration(DistributedPrimitiveManagerConfiguration configuration) {
this.distributedManagerConfiguration = configuration;
return this;
}
public DistributedPrimitiveManagerConfiguration getDistributedManagerConfiguration() {
return distributedManagerConfiguration;
}
public String getCoordinationId() {
return coordinationId;
}
public void setCoordinationId(String newCoordinationId) {
if (newCoordinationId == null) {
return;
}
final int len = newCoordinationId.length();
if (len >= 16) {
this.coordinationId = newCoordinationId.substring(0, 16);
} else if (len % 2 != 0) {
// must be even for conversion to uuid, extend to next even
this.coordinationId = newCoordinationId + "+";
} else if (len > 0 ) {
// run with it
this.coordinationId = newCoordinationId;
}
if (this.coordinationId != null) {
this.coordinationId = this.coordinationId.replace('-', '.');
}
}
}

View File

@ -69,7 +69,10 @@ import org.apache.activemq.artemis.core.config.federation.FederationQueuePolicyC
import org.apache.activemq.artemis.core.config.federation.FederationStreamConfiguration;
import org.apache.activemq.artemis.core.config.federation.FederationTransformerConfiguration;
import org.apache.activemq.artemis.core.config.federation.FederationUpstreamConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ColocatedPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.DistributedPrimitiveManagerConfiguration;
import org.apache.activemq.artemis.core.config.ha.LiveOnlyPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicaPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicatedPolicyConfiguration;
@ -1607,6 +1610,16 @@ public final class FileConfigurationParser extends XMLConfigurationUtil {
Element colocatedNode = (Element) colocatedNodeList.item(0);
mainConfig.setHAPolicyConfiguration(createColocatedHaPolicy(colocatedNode, true));
}
NodeList primaryNodeList = e.getElementsByTagName("primary");
if (primaryNodeList.getLength() > 0) {
Element primaryNode = (Element) primaryNodeList.item(0);
mainConfig.setHAPolicyConfiguration(createReplicationPrimaryHaPolicy(primaryNode, mainConfig));
}
NodeList backupNodeList = e.getElementsByTagName("backup");
if (backupNodeList.getLength() > 0) {
Element backupNode = (Element) backupNodeList.item(0);
mainConfig.setHAPolicyConfiguration(createReplicationBackupHaPolicy(backupNode, mainConfig));
}
} else if (haNode.getTagName().equals("shared-store")) {
NodeList masterNodeList = e.getElementsByTagName("master");
if (masterNodeList.getLength() > 0) {
@ -1699,6 +1712,67 @@ public final class FileConfigurationParser extends XMLConfigurationUtil {
return configuration;
}
private ReplicationPrimaryPolicyConfiguration createReplicationPrimaryHaPolicy(Element policyNode, Configuration config) {
ReplicationPrimaryPolicyConfiguration configuration = ReplicationPrimaryPolicyConfiguration.withDefault();
configuration.setGroupName(getString(policyNode, "group-name", configuration.getGroupName(), Validators.NO_CHECK));
configuration.setClusterName(getString(policyNode, "cluster-name", configuration.getClusterName(), Validators.NO_CHECK));
configuration.setInitialReplicationSyncTimeout(getLong(policyNode, "initial-replication-sync-timeout", configuration.getInitialReplicationSyncTimeout(), Validators.GT_ZERO));
configuration.setRetryReplicationWait(getLong(policyNode, "retry-replication-wait", configuration.getRetryReplicationWait(), Validators.GT_ZERO));
configuration.setDistributedManagerConfiguration(createDistributedPrimitiveManagerConfiguration(policyNode, config));
configuration.setCoordinationId(getString(policyNode, "coordination-id", configuration.getCoordinationId(), Validators.NOT_NULL_OR_EMPTY));
return configuration;
}
private ReplicationBackupPolicyConfiguration createReplicationBackupHaPolicy(Element policyNode, Configuration config) {
ReplicationBackupPolicyConfiguration configuration = ReplicationBackupPolicyConfiguration.withDefault();
configuration.setGroupName(getString(policyNode, "group-name", configuration.getGroupName(), Validators.NO_CHECK));
configuration.setAllowFailBack(getBoolean(policyNode, "allow-failback", configuration.isAllowFailBack()));
configuration.setInitialReplicationSyncTimeout(getLong(policyNode, "initial-replication-sync-timeout", configuration.getInitialReplicationSyncTimeout(), Validators.GT_ZERO));
configuration.setClusterName(getString(policyNode, "cluster-name", configuration.getClusterName(), Validators.NO_CHECK));
configuration.setMaxSavedReplicatedJournalsSize(getInteger(policyNode, "max-saved-replicated-journals-size", configuration.getMaxSavedReplicatedJournalsSize(), Validators.MINUS_ONE_OR_GE_ZERO));
configuration.setRetryReplicationWait(getLong(policyNode, "retry-replication-wait", configuration.getRetryReplicationWait(), Validators.GT_ZERO));
configuration.setDistributedManagerConfiguration(createDistributedPrimitiveManagerConfiguration(policyNode, config));
return configuration;
}
private DistributedPrimitiveManagerConfiguration createDistributedPrimitiveManagerConfiguration(Element policyNode, Configuration config) {
final Element managerNode = (Element) policyNode.getElementsByTagName("manager").item(0);
final String className = getString(managerNode, "class-name",
ActiveMQDefaultConfiguration.getDefaultDistributedPrimitiveManagerClassName(),
Validators.NO_CHECK);
final Map<String, String> properties;
if (parameterExists(managerNode, "properties")) {
final NodeList propertyNodeList = managerNode.getElementsByTagName("property");
final int propertiesCount = propertyNodeList.getLength();
properties = new HashMap<>(propertiesCount);
for (int i = 0; i < propertiesCount; i++) {
final Element propertyNode = (Element) propertyNodeList.item(i);
final String propertyName = propertyNode.getAttributeNode("key").getValue();
final String propertyValue = propertyNode.getAttributeNode("value").getValue();
properties.put(propertyName, propertyValue);
}
} else {
properties = new HashMap<>(1);
}
return new DistributedPrimitiveManagerConfiguration(className, properties);
}
private SharedStoreMasterPolicyConfiguration createSharedStoreMasterHaPolicy(Element policyNode) {
SharedStoreMasterPolicyConfiguration configuration = new SharedStoreMasterPolicyConfiguration();

View File

@ -4204,6 +4204,17 @@ public class ActiveMQServerControlImpl extends AbstractControl implements Active
return server.getNodeID() == null ? null : server.getNodeID().toString();
}
@Override
public long getActivationSequence() {
if (AuditLogger.isBaseLoggingEnabled()) {
AuditLogger.getActivationSequence(this.server);
}
if (server.getNodeManager() != null) {
return server.getNodeManager().getNodeActivationSequence();
}
return 0;
}
@Override
public String getManagementNotificationAddress() {
if (AuditLogger.isBaseLoggingEnabled()) {

View File

@ -43,7 +43,8 @@ public class ReplicationStartSyncMessage extends PacketImpl {
public enum SyncDataType {
JournalBindings(AbstractJournalStorageManager.JournalContent.BINDINGS.typeByte),
JournalMessages(AbstractJournalStorageManager.JournalContent.MESSAGES.typeByte),
LargeMessages((byte) 2);
LargeMessages((byte) 2),
ActivationSequence((byte) 3);
private byte code;
@ -62,6 +63,9 @@ public class ReplicationStartSyncMessage extends PacketImpl {
return JournalMessages;
if (code == LargeMessages.code)
return LargeMessages;
if (code == ActivationSequence.code)
return ActivationSequence;
throw new InvalidParameterException("invalid byte: " + code);
}
}
@ -80,6 +84,14 @@ public class ReplicationStartSyncMessage extends PacketImpl {
nodeID = ""; // this value will be ignored
}
public ReplicationStartSyncMessage(String nodeID, long nodeDataVersion) {
this(nodeID);
ids = new long[1];
ids[0] = nodeDataVersion;
dataType = SyncDataType.ActivationSequence;
}
public ReplicationStartSyncMessage(String nodeID) {
this();
synchronizationIsFinished = true;
@ -118,10 +130,6 @@ public class ReplicationStartSyncMessage extends PacketImpl {
DataConstants.SIZE_BOOLEAN + // buffer.writeBoolean(allowsAutoFailBack);
nodeID.length() * 3; // buffer.writeString(nodeID); -- an estimate
if (synchronizationIsFinished) {
return size;
}
size += DataConstants.SIZE_BYTE + // buffer.writeByte(dataType.code);
DataConstants.SIZE_INT + // buffer.writeInt(ids.length);
DataConstants.SIZE_LONG * ids.length; // the write loop
@ -135,8 +143,6 @@ public class ReplicationStartSyncMessage extends PacketImpl {
buffer.writeBoolean(synchronizationIsFinished);
buffer.writeBoolean(allowsAutoFailBack);
buffer.writeString(nodeID);
if (synchronizationIsFinished)
return;
buffer.writeByte(dataType.code);
buffer.writeInt(ids.length);
for (long id : ids) {
@ -149,9 +155,6 @@ public class ReplicationStartSyncMessage extends PacketImpl {
synchronizationIsFinished = buffer.readBoolean();
allowsAutoFailBack = buffer.readBoolean();
nodeID = buffer.readString();
if (synchronizationIsFinished) {
return;
}
dataType = SyncDataType.getDataType(buffer.readByte());
int length = buffer.readInt();
ids = new long[length];

View File

@ -37,7 +37,6 @@ import org.apache.activemq.artemis.api.core.Interceptor;
import org.apache.activemq.artemis.api.core.Message;
import org.apache.activemq.artemis.api.core.SimpleString;
import org.apache.activemq.artemis.core.config.Configuration;
import org.apache.activemq.artemis.core.io.IOCriticalErrorListener;
import org.apache.activemq.artemis.core.io.SequentialFile;
import org.apache.activemq.artemis.core.journal.EncoderPersister;
import org.apache.activemq.artemis.core.journal.Journal;
@ -82,9 +81,8 @@ import org.apache.activemq.artemis.core.replication.ReplicationManager.ADD_OPERA
import org.apache.activemq.artemis.core.server.ActiveMQComponent;
import org.apache.activemq.artemis.core.server.ActiveMQMessageBundle;
import org.apache.activemq.artemis.core.server.ActiveMQServerLogger;
import org.apache.activemq.artemis.core.server.cluster.qourum.SharedNothingBackupQuorum;
import org.apache.activemq.artemis.core.server.impl.ActiveMQServerImpl;
import org.apache.activemq.artemis.core.server.impl.SharedNothingBackupActivation;
import org.apache.activemq.artemis.utils.actors.OrderedExecutorFactory;
import org.jboss.logging.Logger;
@ -94,12 +92,20 @@ import org.jboss.logging.Logger;
*/
public final class ReplicationEndpoint implements ChannelHandler, ActiveMQComponent {
public interface ReplicationEndpointEventListener {
void onRemoteBackupUpToDate();
void onLiveStopping(ReplicationLiveIsStoppingMessage.LiveStopping message) throws ActiveMQException;
void onLiveNodeId(String nodeId);
}
private static final Logger logger = Logger.getLogger(ReplicationEndpoint.class);
private final IOCriticalErrorListener criticalErrorListener;
private final ActiveMQServerImpl server;
private final boolean wantedFailBack;
private final SharedNothingBackupActivation activation;
private final ReplicationEndpointEventListener eventListener;
private final boolean noSync = false;
private Channel channel;
private boolean supportResponseBatching;
@ -129,8 +135,6 @@ public final class ReplicationEndpoint implements ChannelHandler, ActiveMQCompon
private boolean deletePages = true;
private volatile boolean started;
private SharedNothingBackupQuorum backupQuorum;
private Executor executor;
private List<Interceptor> outgoingInterceptors = null;
@ -140,13 +144,11 @@ public final class ReplicationEndpoint implements ChannelHandler, ActiveMQCompon
// Constructors --------------------------------------------------
public ReplicationEndpoint(final ActiveMQServerImpl server,
IOCriticalErrorListener criticalErrorListener,
boolean wantedFailBack,
SharedNothingBackupActivation activation) {
ReplicationEndpointEventListener eventListener) {
this.server = server;
this.criticalErrorListener = criticalErrorListener;
this.wantedFailBack = wantedFailBack;
this.activation = activation;
this.eventListener = eventListener;
this.pendingPackets = new ArrayDeque<>();
this.supportResponseBatching = false;
}
@ -287,7 +289,7 @@ public final class ReplicationEndpoint implements ChannelHandler, ActiveMQCompon
* @throws ActiveMQException
*/
private void handleLiveStopping(ReplicationLiveIsStoppingMessage packet) throws ActiveMQException {
activation.remoteFailOver(packet.isFinalMessage());
eventListener.onLiveStopping(packet.isFinalMessage());
}
@Override
@ -474,14 +476,14 @@ public final class ReplicationEndpoint implements ChannelHandler, ActiveMQCompon
}
journalsHolder = null;
backupQuorum.liveIDSet(liveID);
activation.setRemoteBackupUpToDate();
eventListener.onLiveNodeId(liveID);
eventListener.onRemoteBackupUpToDate();
if (logger.isTraceEnabled()) {
logger.trace("Backup is synchronized / BACKUP-SYNC-DONE");
}
ActiveMQServerLogger.LOGGER.backupServerSynched(server);
ActiveMQServerLogger.LOGGER.backupServerSynchronized(server, liveID);
return;
}
@ -558,6 +560,11 @@ public final class ReplicationEndpoint implements ChannelHandler, ActiveMQCompon
return replicationResponseMessage;
if (packet.isSynchronizationFinished()) {
if (packet.getFileIds() != null && packet.getFileIds().length == 1) {
// this is the version sequence of the data we are replicating
// verified if we activate with this data
server.getNodeManager().writeNodeActivationSequence(packet.getFileIds()[0]);
}
finishSynchronization(packet.getNodeID());
replicationResponseMessage.setSynchronizationIsFinishedAcknowledgement(true);
return replicationResponseMessage;
@ -597,7 +604,7 @@ public final class ReplicationEndpoint implements ChannelHandler, ActiveMQCompon
if (packet.getNodeID() != null) {
// At the start of replication, we still do not know which is the nodeID that the live uses.
// This is the point where the backup gets this information.
backupQuorum.liveIDSet(packet.getNodeID());
eventListener.onLiveNodeId(packet.getNodeID());
}
break;
@ -900,16 +907,6 @@ public final class ReplicationEndpoint implements ChannelHandler, ActiveMQCompon
}
}
/**
* Sets the quorumManager used by the server in the replicationEndpoint. It is used to inform the
* backup server of the live's nodeID.
*
* @param backupQuorum
*/
public void setBackupQuorum(SharedNothingBackupQuorum backupQuorum) {
this.backupQuorum = backupQuorum;
}
/**
* @param executor2
*/

View File

@ -821,7 +821,7 @@ public final class ReplicationManager implements ActiveMQComponent {
}
synchronizationIsFinishedAcknowledgement.countUp();
sendReplicatePacket(new ReplicationStartSyncMessage(nodeID));
sendReplicatePacket(new ReplicationStartSyncMessage(nodeID, server.getNodeManager().getNodeActivationSequence()));
try {
if (!synchronizationIsFinishedAcknowledgement.await(initialReplicationSyncTimeout)) {
ActiveMQReplicationTimeooutException exception = ActiveMQMessageBundle.BUNDLE.replicationSynchronizationTimeout(initialReplicationSyncTimeout);

View File

@ -40,7 +40,6 @@ import org.apache.activemq.artemis.core.persistence.OperationContext;
import org.apache.activemq.artemis.core.persistence.StorageManager;
import org.apache.activemq.artemis.core.postoffice.PostOffice;
import org.apache.activemq.artemis.core.remoting.server.RemotingService;
import org.apache.activemq.artemis.core.replication.ReplicationEndpoint;
import org.apache.activemq.artemis.core.replication.ReplicationManager;
import org.apache.activemq.artemis.core.security.Role;
import org.apache.activemq.artemis.core.security.SecurityAuth;
@ -166,11 +165,6 @@ public interface ActiveMQServer extends ServiceComponent {
CriticalAnalyzer getCriticalAnalyzer();
/**
* @return
*/
ReplicationEndpoint getReplicationEndpoint();
/**
* it will release hold a lock for the activation.
*/

View File

@ -190,8 +190,8 @@ public interface ActiveMQServerLogger extends BasicLogger {
void errorStoppingConnectorService(@Cause Throwable e, String name);
@LogMessage(level = Logger.Level.INFO)
@Message(id = 221024, value = "Backup server {0} is synchronized with live-server.", format = Message.Format.MESSAGE_FORMAT)
void backupServerSynched(ActiveMQServerImpl server);
@Message(id = 221024, value = "Backup server {0} is synchronized with live server, nodeID={1}.", format = Message.Format.MESSAGE_FORMAT)
void backupServerSynchronized(ActiveMQServerImpl server, String liveID);
@LogMessage(level = Logger.Level.INFO)
@Message(id = 221025, value = "Replication: sending {0} (size={1}) to replica.", format = Message.Format.MESSAGE_FORMAT)

View File

@ -21,7 +21,6 @@ import org.apache.activemq.artemis.api.core.Pair;
import org.apache.activemq.artemis.api.core.TransportConfiguration;
import org.apache.activemq.artemis.api.core.client.ClusterTopologyListener;
import org.apache.activemq.artemis.core.client.impl.ServerLocatorInternal;
import org.apache.activemq.artemis.core.server.cluster.qourum.SharedNothingBackupQuorum;
/**
* A class that will locate a particular live server running in a cluster. How this live is chosen
@ -31,16 +30,23 @@ import org.apache.activemq.artemis.core.server.cluster.qourum.SharedNothingBacku
*/
public abstract class LiveNodeLocator implements ClusterTopologyListener {
private SharedNothingBackupQuorum backupQuorum;
@FunctionalInterface
public interface BackupRegistrationListener {
public LiveNodeLocator(SharedNothingBackupQuorum backupQuorum) {
this.backupQuorum = backupQuorum;
void onBackupRegistrationFailed(boolean alreadyReplicating);
}
private final BackupRegistrationListener backupRegistrationListener;
public LiveNodeLocator(BackupRegistrationListener backupRegistrationListener) {
this.backupRegistrationListener = backupRegistrationListener;
}
/**
* Use this constructor when the LiveNodeLocator is used for scaling down rather than replicating
*/
public LiveNodeLocator() {
this(null);
}
/**
@ -67,12 +73,8 @@ public abstract class LiveNodeLocator implements ClusterTopologyListener {
* tells the locator the the current connector has failed.
*/
public void notifyRegistrationFailed(boolean alreadyReplicating) {
if (backupQuorum != null) {
if (alreadyReplicating) {
backupQuorum.notifyAlreadyReplicating();
} else {
backupQuorum.notifyRegistrationFailed();
}
if (backupRegistrationListener != null) {
backupRegistrationListener.onBackupRegistrationFailed(alreadyReplicating);
}
}

View File

@ -39,6 +39,7 @@ public abstract class NodeManager implements ActiveMQComponent {
private UUID uuid;
private boolean isStarted = false;
private final Set<FileLockNodeManager.LockListener> lockListeners;
protected long nodeActivationSequence; // local version of a coordinated sequence, tracking state transitions of ownership
public NodeManager(final boolean replicatedBackup) {
this.replicatedBackup = replicatedBackup;
@ -79,8 +80,30 @@ public abstract class NodeManager implements ActiveMQComponent {
}
}
public long readNodeActivationSequence() throws NodeManagerException {
// TODO make it abstract
throw new UnsupportedOperationException("TODO");
}
public void writeNodeActivationSequence(long version) throws NodeManagerException {
// TODO make it abstract
throw new UnsupportedOperationException("TODO");
}
public abstract SimpleString readNodeId() throws NodeManagerException;
public long getNodeActivationSequence() {
synchronized (nodeIDGuard) {
return nodeActivationSequence;
}
}
public void setNodeActivationSequence(long activationSequence) {
synchronized (nodeIDGuard) {
nodeActivationSequence = activationSequence;
}
}
public UUID getUUID() {
synchronized (nodeIDGuard) {
return uuid;

View File

@ -80,10 +80,16 @@ public class ClusterController implements ActiveMQComponent {
private boolean started;
private SimpleString replicatedClusterName;
public ClusterController(ActiveMQServer server, ScheduledExecutorService scheduledExecutor) {
public ClusterController(ActiveMQServer server,
ScheduledExecutorService scheduledExecutor,
boolean useQuorumManager) {
this.server = server;
executor = server.getExecutorFactory().getExecutor();
quorumManager = new QuorumManager(scheduledExecutor, this);
quorumManager = useQuorumManager ? new QuorumManager(scheduledExecutor, this) : null;
}
public ClusterController(ActiveMQServer server, ScheduledExecutorService scheduledExecutor) {
this(server, scheduledExecutor, true);
}
@Override
@ -108,11 +114,11 @@ public class ClusterController implements ActiveMQComponent {
//latch so we know once we are connected
replicationClusterConnectedLatch = new CountDownLatch(1);
//and add the quorum manager as a topology listener
if (defaultLocator != null) {
defaultLocator.addClusterTopologyListener(quorumManager);
}
if (quorumManager != null) {
if (defaultLocator != null) {
defaultLocator.addClusterTopologyListener(quorumManager);
}
//start the quorum manager
quorumManager.start();
}
@ -126,6 +132,26 @@ public class ClusterController implements ActiveMQComponent {
}
}
/**
* It adds {@code clusterTopologyListener} to {@code defaultLocator}.
*/
public void addClusterTopologyListener(ClusterTopologyListener clusterTopologyListener) {
if (!this.started || defaultLocator == null) {
throw new IllegalStateException("the controller must be started and with a locator initialized");
}
this.defaultLocator.addClusterTopologyListener(clusterTopologyListener);
}
/**
* It remove {@code clusterTopologyListener} from {@code defaultLocator}.
*/
public void removeClusterTopologyListener(ClusterTopologyListener clusterTopologyListener) {
if (!this.started || defaultLocator == null) {
throw new IllegalStateException("the controller must be started and with a locator initialized");
}
this.defaultLocator.removeClusterTopologyListener(clusterTopologyListener);
}
@Override
public void stop() throws Exception {
if (logger.isDebugEnabled()) {
@ -138,7 +164,9 @@ public class ClusterController implements ActiveMQComponent {
serverLocatorInternal.close();
}
//stop the quorum manager
quorumManager.stop();
if (quorumManager != null) {
quorumManager.stop();
}
}
@Override
@ -223,6 +251,17 @@ public class ClusterController implements ActiveMQComponent {
}
}
/**
* add a cluster listener
*
* @param listener
*/
public void removeClusterTopologyListenerForReplication(ClusterTopologyListener listener) {
if (replicationLocator != null) {
replicationLocator.removeClusterTopologyListener(listener);
}
}
/**
* add an interceptor
*
@ -232,6 +271,15 @@ public class ClusterController implements ActiveMQComponent {
replicationLocator.addIncomingInterceptor(interceptor);
}
/**
* remove an interceptor
*
* @param interceptor
*/
public void removeIncomingInterceptorForReplication(Interceptor interceptor) {
replicationLocator.removeIncomingInterceptor(interceptor);
}
/**
* connect to a specific node in the cluster used for replication
*
@ -406,7 +454,11 @@ public class ClusterController implements ActiveMQComponent {
logger.debug("there is no acceptor used configured at the CoreProtocolManager " + this);
}
} else if (packet.getType() == PacketImpl.QUORUM_VOTE) {
quorumManager.handleQuorumVote(clusterChannel, packet);
if (quorumManager != null) {
quorumManager.handleQuorumVote(clusterChannel, packet);
} else {
logger.warnf("Received %s on a cluster connection that's using the new quorum vote algorithm.", packet);
}
} else if (packet.getType() == PacketImpl.SCALEDOWN_ANNOUNCEMENT) {
ScaleDownAnnounceMessage message = (ScaleDownAnnounceMessage) packet;
//we don't really need to check as it should always be true

View File

@ -157,7 +157,7 @@ public class ClusterManager implements ActiveMQComponent {
final ManagementService managementService,
final Configuration configuration,
final NodeManager nodeManager,
final boolean backup) {
final boolean useQuorumManager) {
this.executorFactory = executorFactory;
executor = executorFactory.getExecutor();
@ -174,7 +174,7 @@ public class ClusterManager implements ActiveMQComponent {
this.nodeManager = nodeManager;
clusterController = new ClusterController(server, scheduledExecutor);
clusterController = new ClusterController(server, scheduledExecutor, useQuorumManager);
haManager = server.getActivation().getHAManager();
}

View File

@ -57,4 +57,8 @@ public interface HAPolicy<T extends Activation> {
String getScaleDownClustername();
default boolean useQuorumManager() {
return true;
}
}

View File

@ -0,0 +1,157 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.core.server.cluster.ha;
import java.util.Map;
import java.util.Objects;
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.DistributedPrimitiveManagerConfiguration;
import org.apache.activemq.artemis.core.io.IOCriticalErrorListener;
import org.apache.activemq.artemis.core.server.impl.ActiveMQServerImpl;
import org.apache.activemq.artemis.core.server.impl.ReplicationBackupActivation;
import org.apache.activemq.artemis.quorum.DistributedPrimitiveManager;
public class ReplicationBackupPolicy implements HAPolicy<ReplicationBackupActivation> {
private final ReplicationPrimaryPolicy livePolicy;
private final String groupName;
private final String clusterName;
private final int maxSavedReplicatedJournalsSize;
private final long retryReplicationWait;
private final DistributedPrimitiveManagerConfiguration managerConfiguration;
private final boolean tryFailback;
private ReplicationBackupPolicy(ReplicationBackupPolicyConfiguration configuration,
ReplicationPrimaryPolicy livePolicy) {
Objects.requireNonNull(livePolicy);
this.clusterName = configuration.getClusterName();
this.maxSavedReplicatedJournalsSize = configuration.getMaxSavedReplicatedJournalsSize();
this.groupName = configuration.getGroupName();
this.retryReplicationWait = configuration.getRetryReplicationWait();
this.managerConfiguration = configuration.getDistributedManagerConfiguration();
this.tryFailback = true;
this.livePolicy = livePolicy;
}
private ReplicationBackupPolicy(ReplicationBackupPolicyConfiguration configuration) {
this.clusterName = configuration.getClusterName();
this.maxSavedReplicatedJournalsSize = configuration.getMaxSavedReplicatedJournalsSize();
this.groupName = configuration.getGroupName();
this.retryReplicationWait = configuration.getRetryReplicationWait();
this.managerConfiguration = configuration.getDistributedManagerConfiguration();
this.tryFailback = false;
livePolicy = ReplicationPrimaryPolicy.failoverPolicy(
configuration.getInitialReplicationSyncTimeout(),
configuration.getGroupName(),
configuration.getClusterName(),
this,
configuration.isAllowFailBack(),
configuration.getDistributedManagerConfiguration());
}
public boolean isTryFailback() {
return tryFailback;
}
/**
* It creates a policy which live policy won't cause to broker to try failback.
*/
public static ReplicationBackupPolicy with(ReplicationBackupPolicyConfiguration configuration) {
return new ReplicationBackupPolicy(configuration);
}
/**
* It creates a companion backup policy for a natural-born primary: it would cause the broker to try failback.
*/
static ReplicationBackupPolicy failback(long retryReplicationWait,
String clusterName,
String groupName,
ReplicationPrimaryPolicy livePolicy,
DistributedPrimitiveManagerConfiguration distributedManagerConfiguration) {
return new ReplicationBackupPolicy(ReplicationBackupPolicyConfiguration.withDefault()
.setRetryReplicationWait(retryReplicationWait)
.setClusterName(clusterName)
.setGroupName(groupName)
.setDistributedManagerConfiguration(distributedManagerConfiguration),
livePolicy);
}
@Override
public ReplicationBackupActivation createActivation(ActiveMQServerImpl server,
boolean wasLive,
Map<String, Object> activationParams,
IOCriticalErrorListener shutdownOnCriticalIO) throws Exception {
return new ReplicationBackupActivation(server, DistributedPrimitiveManager.newInstanceOf(
managerConfiguration.getClassName(), managerConfiguration.getProperties()), this);
}
@Override
public boolean isSharedStore() {
return false;
}
@Override
public boolean isBackup() {
return true;
}
@Override
public boolean canScaleDown() {
return false;
}
@Override
public String getScaleDownGroupName() {
return null;
}
@Override
public String getScaleDownClustername() {
return null;
}
public String getClusterName() {
return clusterName;
}
@Override
public String getBackupGroupName() {
return groupName;
}
public String getGroupName() {
return groupName;
}
public ReplicationPrimaryPolicy getLivePolicy() {
return livePolicy;
}
public int getMaxSavedReplicatedJournalsSize() {
return maxSavedReplicatedJournalsSize;
}
public long getRetryReplicationWait() {
return retryReplicationWait;
}
@Override
public boolean useQuorumManager() {
return false;
}
}

View File

@ -0,0 +1,164 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.core.server.cluster.ha;
import java.util.Map;
import java.util.Objects;
import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.DistributedPrimitiveManagerConfiguration;
import org.apache.activemq.artemis.core.io.IOCriticalErrorListener;
import org.apache.activemq.artemis.core.server.impl.ActiveMQServerImpl;
import org.apache.activemq.artemis.core.server.impl.ReplicationPrimaryActivation;
import org.apache.activemq.artemis.quorum.DistributedPrimitiveManager;
public class ReplicationPrimaryPolicy implements HAPolicy<ReplicationPrimaryActivation> {
private final ReplicationBackupPolicy backupPolicy;
private final String clusterName;
private final String groupName;
private final long initialReplicationSyncTimeout;
private final DistributedPrimitiveManagerConfiguration distributedManagerConfiguration;
private final boolean allowAutoFailBack;
private final String coordinationId;
private ReplicationPrimaryPolicy(ReplicationPrimaryPolicyConfiguration configuration,
ReplicationBackupPolicy backupPolicy,
boolean allowAutoFailBack) {
Objects.requireNonNull(backupPolicy);
clusterName = configuration.getClusterName();
groupName = configuration.getGroupName();
initialReplicationSyncTimeout = configuration.getInitialReplicationSyncTimeout();
distributedManagerConfiguration = configuration.getDistributedManagerConfiguration();
coordinationId = configuration.getCoordinationId();
this.allowAutoFailBack = allowAutoFailBack;
this.backupPolicy = backupPolicy;
}
private ReplicationPrimaryPolicy(ReplicationPrimaryPolicyConfiguration config) {
clusterName = config.getClusterName();
groupName = config.getGroupName();
coordinationId = config.getCoordinationId();
initialReplicationSyncTimeout = config.getInitialReplicationSyncTimeout();
distributedManagerConfiguration = config.getDistributedManagerConfiguration();
this.allowAutoFailBack = false;
backupPolicy = ReplicationBackupPolicy.failback(config.getRetryReplicationWait(), config.getClusterName(),
config.getGroupName(), this,
config.getDistributedManagerConfiguration());
}
/**
* It creates a companion failing-over primary policy for a natural-born backup: it's allowed to allow auto fail-back
* only if configured to do it.
*/
static ReplicationPrimaryPolicy failoverPolicy(long initialReplicationSyncTimeout,
String groupName,
String clusterName,
ReplicationBackupPolicy replicaPolicy,
boolean allowAutoFailback,
DistributedPrimitiveManagerConfiguration distributedManagerConfiguration) {
return new ReplicationPrimaryPolicy(ReplicationPrimaryPolicyConfiguration.withDefault()
.setInitialReplicationSyncTimeout(initialReplicationSyncTimeout)
.setGroupName(groupName)
.setClusterName(clusterName)
.setDistributedManagerConfiguration(distributedManagerConfiguration),
replicaPolicy, allowAutoFailback);
}
/**
* It creates a primary policy that never allow auto fail-back.<br>
* It's meant to be used for natural-born primary brokers: its backup policy is set to always try to fail-back.
*/
public static ReplicationPrimaryPolicy with(ReplicationPrimaryPolicyConfiguration configuration) {
return new ReplicationPrimaryPolicy(configuration);
}
public ReplicationBackupPolicy getBackupPolicy() {
return backupPolicy;
}
@Override
public ReplicationPrimaryActivation createActivation(ActiveMQServerImpl server,
boolean wasLive,
Map<String, Object> activationParams,
IOCriticalErrorListener shutdownOnCriticalIO) throws Exception {
return new ReplicationPrimaryActivation(server,
DistributedPrimitiveManager.newInstanceOf(
distributedManagerConfiguration.getClassName(),
distributedManagerConfiguration.getProperties()), this);
}
@Override
public boolean isSharedStore() {
return false;
}
@Override
public boolean isBackup() {
return false;
}
@Override
public boolean isWaitForActivation() {
return true;
}
@Override
public boolean canScaleDown() {
return false;
}
@Override
public String getBackupGroupName() {
return groupName;
}
@Override
public String getScaleDownGroupName() {
return null;
}
@Override
public String getScaleDownClustername() {
return null;
}
public boolean isAllowAutoFailBack() {
return allowAutoFailBack;
}
public String getClusterName() {
return clusterName;
}
public long getInitialReplicationSyncTimeout() {
return initialReplicationSyncTimeout;
}
public String getGroupName() {
return groupName;
}
@Override
public boolean useQuorumManager() {
return false;
}
public String getCoordinationId() {
return coordinationId;
}
}

View File

@ -28,11 +28,12 @@ import org.apache.activemq.artemis.core.client.impl.Topology;
import org.apache.activemq.artemis.core.protocol.core.CoreRemotingConnection;
import org.apache.activemq.artemis.core.protocol.core.impl.wireformat.ReplicationLiveIsStoppingMessage;
import org.apache.activemq.artemis.core.server.ActiveMQServerLogger;
import org.apache.activemq.artemis.core.server.LiveNodeLocator.BackupRegistrationListener;
import org.apache.activemq.artemis.core.server.NetworkHealthCheck;
import org.apache.activemq.artemis.core.server.NodeManager;
import org.jboss.logging.Logger;
public class SharedNothingBackupQuorum implements Quorum, SessionFailureListener {
public class SharedNothingBackupQuorum implements Quorum, SessionFailureListener, BackupRegistrationListener {
private static final Logger LOGGER = Logger.getLogger(SharedNothingBackupQuorum.class);
@ -236,13 +237,9 @@ public class SharedNothingBackupQuorum implements Quorum, SessionFailureListener
}
}
public void notifyRegistrationFailed() {
signal = BACKUP_ACTIVATION.FAILURE_REPLICATING;
latch.countDown();
}
public void notifyAlreadyReplicating() {
signal = BACKUP_ACTIVATION.ALREADY_REPLICATING;
@Override
public void onBackupRegistrationFailed(boolean alreadyReplicating) {
signal = alreadyReplicating ? BACKUP_ACTIVATION.ALREADY_REPLICATING : BACKUP_ACTIVATION.FAILURE_REPLICATING;
latch.countDown();
}

View File

@ -36,6 +36,7 @@ public class FileMoveManager {
private static final Logger logger = Logger.getLogger(FileMoveManager.class);
private final File folder;
private final String[] prefixesToPreserve;
private int maxFolders;
public static final String PREFIX = "oldreplica.";
@ -70,9 +71,10 @@ public class FileMoveManager {
this(folder, -1);
}
public FileMoveManager(File folder, int maxFolders) {
public FileMoveManager(File folder, int maxFolders, String... prefixesToPreserve) {
this.folder = folder;
this.maxFolders = maxFolders;
this.prefixesToPreserve = prefixesToPreserve != null ? Arrays.copyOf(prefixesToPreserve, prefixesToPreserve.length) : null;
}
public int getMaxFolders() {
@ -99,8 +101,23 @@ public class FileMoveManager {
ActiveMQServerLogger.LOGGER.backupDeletingData(folder.getPath());
for (String fileMove : files) {
File fileFrom = new File(folder, fileMove);
logger.tracef("deleting %s", fileFrom);
deleteTree(fileFrom);
if (prefixesToPreserve != null) {
boolean skip = false;
for (String prefixToPreserve : prefixesToPreserve) {
if (fileMove.startsWith(prefixToPreserve)) {
logger.tracef("skipping %s", fileFrom);
skip = true;
break;
}
}
if (!skip) {
logger.tracef("deleting %s", fileFrom);
deleteTree(fileFrom);
}
} else {
logger.tracef("deleting %s", fileFrom);
deleteTree(fileFrom);
}
}
} else {
// Since we will create one folder, we are already taking that one into consideration
@ -113,8 +130,26 @@ public class FileMoveManager {
for (String fileMove : files) {
File fileFrom = new File(folder, fileMove);
File fileTo = new File(folderTo, fileMove);
logger.tracef("doMove:: moving %s as %s", fileFrom, fileTo);
Files.move(fileFrom.toPath(), fileTo.toPath());
if (prefixesToPreserve != null) {
boolean copy = false;
for (String prefixToPreserve : prefixesToPreserve) {
if (fileMove.startsWith(prefixToPreserve)) {
logger.tracef("skipping %s", fileFrom);
copy = true;
break;
}
}
if (copy) {
logger.tracef("copying %s to %s", fileFrom, fileTo);
Files.copy(fileFrom.toPath(), fileTo.toPath());
} else {
logger.tracef("doMove:: moving %s as %s", fileFrom, fileTo);
Files.move(fileFrom.toPath(), fileTo.toPath());
}
} else {
logger.tracef("doMove:: moving %s as %s", fileFrom, fileTo);
Files.move(fileFrom.toPath(), fileTo.toPath());
}
}
}

View File

@ -110,4 +110,8 @@ public abstract class Activation implements Runnable {
public ReplicationManager getReplicationManager() {
return null;
}
public boolean isReplicaSync() {
return false;
}
}

View File

@ -109,7 +109,6 @@ import org.apache.activemq.artemis.core.postoffice.impl.LocalQueueBinding;
import org.apache.activemq.artemis.core.postoffice.impl.PostOfficeImpl;
import org.apache.activemq.artemis.core.remoting.server.RemotingService;
import org.apache.activemq.artemis.core.remoting.server.impl.RemotingServiceImpl;
import org.apache.activemq.artemis.core.replication.ReplicationEndpoint;
import org.apache.activemq.artemis.core.replication.ReplicationManager;
import org.apache.activemq.artemis.core.security.CheckType;
import org.apache.activemq.artemis.core.security.Role;
@ -660,7 +659,7 @@ public class ActiveMQServerImpl implements ActiveMQServer {
afterActivationCreated.run();
} catch (Throwable e) {
logger.warn(e.getMessage(), e); // just debug, this is not supposed to happend, and if it does
// it will be embedeed code from tests
// it will be embedded code from tests
}
afterActivationCreated = null;
}
@ -797,14 +796,6 @@ public class ActiveMQServerImpl implements ActiveMQServer {
}
}
@Override
public ReplicationEndpoint getReplicationEndpoint() {
if (activation instanceof SharedNothingBackupActivation) {
return ((SharedNothingBackupActivation) activation).getReplicationEndpoint();
}
return null;
}
@Override
public void unlockActivation() {
activationLock.release();
@ -921,7 +912,7 @@ public class ActiveMQServerImpl implements ActiveMQServer {
return threadPool;
}
public void setActivation(SharedNothingLiveActivation activation) {
public void setActivation(Activation activation) {
this.activation = activation;
}
@ -1145,19 +1136,7 @@ public class ActiveMQServerImpl implements ActiveMQServer {
@Override
public boolean isReplicaSync() {
if (activation instanceof SharedNothingLiveActivation) {
ReplicationManager replicationManager = getReplicationManager();
if (replicationManager == null) {
return false;
} else {
return !replicationManager.isSynchronizing();
}
} else if (activation instanceof SharedNothingBackupActivation) {
return ((SharedNothingBackupActivation) activation).isRemoteBackupUpToDate();
} else {
return false;
}
return activation.isReplicaSync();
}
public void stop(boolean failoverOnServerShutdown, final boolean criticalIOError, boolean restarting) {
@ -2898,6 +2877,8 @@ public class ActiveMQServerImpl implements ActiveMQServer {
public String toString() {
if (identity != null) {
return "ActiveMQServerImpl::" + identity;
} else if (configuration != null && configuration.getName() != null) {
return "ActiveMQServerImpl::" + "name=" + configuration.getName();
}
return "ActiveMQServerImpl::" + (nodeManager != null ? "serverUUID=" + nodeManager.getUUID() : "");
}
@ -3116,7 +3097,7 @@ public class ActiveMQServerImpl implements ActiveMQServer {
postOffice = new PostOfficeImpl(this, storageManager, pagingManager, queueFactory, managementService, configuration.getMessageExpiryScanPeriod(), configuration.getAddressQueueScanPeriod(), configuration.getWildcardConfiguration(), configuration.getIDCacheSize(), configuration.isPersistIDCache(), addressSettingsRepository);
// This can't be created until node id is set
clusterManager = new ClusterManager(executorFactory, this, postOffice, scheduledPool, managementService, configuration, nodeManager, haPolicy.isBackup());
clusterManager = new ClusterManager(executorFactory, this, postOffice, scheduledPool, managementService, configuration, nodeManager, haPolicy.useQuorumManager());
federationManager = new FederationManager(this);
@ -4191,10 +4172,16 @@ public class ActiveMQServerImpl implements ActiveMQServer {
* move any older data away and log a warning about it.
*/
void moveServerData(int maxSavedReplicated) throws IOException {
moveServerData(maxSavedReplicated, false);
}
void moveServerData(int maxSavedReplicated, boolean preserveLockFiles) throws IOException {
File[] dataDirs = new File[]{configuration.getBindingsLocation(), configuration.getJournalLocation(), configuration.getPagingLocation(), configuration.getLargeMessagesLocation()};
for (File data : dataDirs) {
FileMoveManager moveManager = new FileMoveManager(data, maxSavedReplicated);
final boolean isLockFolder = preserveLockFiles ? data.equals(configuration.getNodeManagerLockLocation()) : false;
final String[] lockPrefixes = isLockFolder ? new String[]{FileBasedNodeManager.SERVER_LOCK_NAME, "serverlock"} : null;
FileMoveManager moveManager = new FileMoveManager(data, maxSavedReplicated, lockPrefixes);
moveManager.doMove();
}
}

View File

@ -29,7 +29,6 @@ import org.apache.activemq.artemis.api.core.Pair;
import org.apache.activemq.artemis.api.core.TransportConfiguration;
import org.apache.activemq.artemis.api.core.client.TopologyMember;
import org.apache.activemq.artemis.core.server.LiveNodeLocator;
import org.apache.activemq.artemis.core.server.cluster.qourum.SharedNothingBackupQuorum;
import org.apache.activemq.artemis.utils.ConcurrentUtil;
/**
@ -47,8 +46,9 @@ public class AnyLiveNodeLocatorForReplication extends LiveNodeLocator {
private String nodeID;
public AnyLiveNodeLocatorForReplication(SharedNothingBackupQuorum backupQuorum, ActiveMQServerImpl server, long retryReplicationWait) {
super(backupQuorum);
public AnyLiveNodeLocatorForReplication(BackupRegistrationListener backupRegistrationListener,
ActiveMQServerImpl server, long retryReplicationWait) {
super(backupRegistrationListener);
this.server = server;
this.retryReplicationWait = retryReplicationWait;
}

View File

@ -20,6 +20,7 @@ import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.channels.FileChannel;
import org.apache.activemq.artemis.core.server.ActiveMQServerLogger;
@ -27,17 +28,68 @@ import org.apache.activemq.artemis.core.server.NodeManager;
import org.apache.activemq.artemis.utils.UUID;
import org.apache.activemq.artemis.utils.UUIDGenerator;
import static java.nio.file.StandardOpenOption.CREATE;
import static java.nio.file.StandardOpenOption.READ;
import static java.nio.file.StandardOpenOption.WRITE;
public abstract class FileBasedNodeManager extends NodeManager {
protected static final byte FIRST_TIME_START = '0';
public static final String SERVER_LOCK_NAME = "server.lock";
public static final String SERVER_ACTIVATION_SEQUENCE_NAME = "server.activation.sequence";
private static final String ACCESS_MODE = "rw";
private final File directory;
protected FileChannel channel;
protected FileChannel activationSequenceChannel;
public FileBasedNodeManager(boolean replicatedBackup, File directory) {
super(replicatedBackup);
this.directory = directory;
if (directory != null) {
directory.mkdirs();
}
}
protected void useActivationSequenceChannel() throws IOException {
if (activationSequenceChannel != null) {
return;
}
activationSequenceChannel = FileChannel.open(newFile(SERVER_ACTIVATION_SEQUENCE_NAME).toPath(), READ, WRITE, CREATE);
}
@Override
public long readNodeActivationSequence() throws NodeManagerException {
if (!isStarted()) {
throw new NodeManagerException(new IllegalStateException("node manager must be started first"));
}
try {
useActivationSequenceChannel();
ByteBuffer tmpBuffer = ByteBuffer.allocate(Long.BYTES).order(ByteOrder.BIG_ENDIAN);
if (activationSequenceChannel.read(tmpBuffer, 0) != Long.BYTES) {
return 0;
}
tmpBuffer.flip();
return tmpBuffer.getLong(0);
} catch (IOException ie) {
throw new NodeManagerException(ie);
}
}
@Override
public void writeNodeActivationSequence(long version) throws NodeManagerException {
if (!isStarted()) {
throw new NodeManagerException(new IllegalStateException("node manager must be started first"));
}
try {
useActivationSequenceChannel();
ByteBuffer tmpBuffer = ByteBuffer.allocate(Long.BYTES).order(ByteOrder.BIG_ENDIAN);
tmpBuffer.putLong(0, version);
activationSequenceChannel.write(tmpBuffer, 0);
activationSequenceChannel.force(false);
setNodeActivationSequence(version);
} catch (IOException ie) {
throw new NodeManagerException(ie);
}
}
/**
@ -101,12 +153,8 @@ public abstract class FileBasedNodeManager extends NodeManager {
createNodeId();
}
/**
* @return
*/
protected final File newFile(final String fileName) {
File file = new File(directory, fileName);
return file;
return new File(directory, fileName);
}
protected final synchronized void createNodeId() throws IOException {
@ -137,9 +185,20 @@ public abstract class FileBasedNodeManager extends NodeManager {
@Override
public synchronized void stop() throws Exception {
FileChannel channelCopy = channel;
if (channelCopy != null)
channelCopy.close();
super.stop();
try {
if (channelCopy != null)
channelCopy.close();
} finally {
try {
FileChannel dataVersionChannel = this.activationSequenceChannel;
this.activationSequenceChannel = null;
if (dataVersionChannel != null) {
dataVersionChannel.close();
}
} finally {
super.stop();
}
}
}
@Override

View File

@ -0,0 +1,127 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.core.server.impl;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.Queue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import org.apache.activemq.artemis.api.core.ActiveMQException;
import org.apache.activemq.artemis.api.core.Pair;
import org.apache.activemq.artemis.api.core.TransportConfiguration;
import org.apache.activemq.artemis.api.core.client.TopologyMember;
import org.apache.activemq.artemis.core.server.LiveNodeLocator;
import org.apache.activemq.artemis.utils.ConcurrentUtil;
/**
* It looks for a live server in the cluster with a specific NodeID
*/
public class NamedLiveNodeIdLocatorForReplication extends LiveNodeLocator {
private final Lock lock = new ReentrantLock();
private final Condition condition = lock.newCondition();
private final String nodeID;
private final long retryReplicationWait;
private final Queue<Pair<TransportConfiguration, TransportConfiguration>> liveConfigurations = new LinkedList<>();
private final ArrayList<Pair<TransportConfiguration, TransportConfiguration>> triedConfigurations = new ArrayList<>();
private boolean found;
public NamedLiveNodeIdLocatorForReplication(String nodeID,
BackupRegistrationListener backupRegistrationListener,
long retryReplicationWait) {
super(backupRegistrationListener);
this.nodeID = nodeID;
this.retryReplicationWait = retryReplicationWait;
}
@Override
public void locateNode() throws ActiveMQException {
locateNode(-1L);
}
@Override
public void locateNode(long timeout) throws ActiveMQException {
try {
lock.lock();
if (liveConfigurations.size() == 0) {
try {
if (timeout != -1L) {
ConcurrentUtil.await(condition, timeout);
} else {
while (liveConfigurations.size() == 0) {
condition.await(retryReplicationWait, TimeUnit.MILLISECONDS);
liveConfigurations.addAll(triedConfigurations);
triedConfigurations.clear();
}
}
} catch (InterruptedException e) {
//ignore
}
}
} finally {
lock.unlock();
}
}
@Override
public void nodeUP(TopologyMember topologyMember, boolean last) {
try {
lock.lock();
if (nodeID.equals(topologyMember.getNodeId()) && topologyMember.getLive() != null) {
Pair<TransportConfiguration, TransportConfiguration> liveConfiguration = new Pair<>(topologyMember.getLive(), topologyMember.getBackup());
if (!liveConfigurations.contains(liveConfiguration)) {
liveConfigurations.add(liveConfiguration);
}
found = true;
condition.signal();
}
} finally {
lock.unlock();
}
}
@Override
public void nodeDown(long eventUID, String nodeID) {
//no op
}
@Override
public String getNodeID() {
return found ? nodeID : null;
}
@Override
public Pair<TransportConfiguration, TransportConfiguration> getLiveConfiguration() {
return liveConfigurations.peek();
}
@Override
public void notifyRegistrationFailed(boolean alreadyReplicating) {
try {
lock.lock();
triedConfigurations.add(liveConfigurations.poll());
super.notifyRegistrationFailed(alreadyReplicating);
} finally {
lock.unlock();
}
}
}

View File

@ -29,7 +29,6 @@ import org.apache.activemq.artemis.api.core.Pair;
import org.apache.activemq.artemis.api.core.TransportConfiguration;
import org.apache.activemq.artemis.api.core.client.TopologyMember;
import org.apache.activemq.artemis.core.server.LiveNodeLocator;
import org.apache.activemq.artemis.core.server.cluster.qourum.SharedNothingBackupQuorum;
import org.apache.activemq.artemis.utils.ConcurrentUtil;
/**
@ -48,8 +47,10 @@ public class NamedLiveNodeLocatorForReplication extends LiveNodeLocator {
private String nodeID;
public NamedLiveNodeLocatorForReplication(String backupGroupName, SharedNothingBackupQuorum quorumManager, long retryReplicationWait) {
super(quorumManager);
public NamedLiveNodeLocatorForReplication(String backupGroupName,
BackupRegistrationListener backupRegistrationListener,
long retryReplicationWait) {
super(backupRegistrationListener);
this.backupGroupName = backupGroupName;
this.retryReplicationWait = retryReplicationWait;
}

View File

@ -0,0 +1,571 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.core.server.impl;
import javax.annotation.concurrent.GuardedBy;
import java.util.Objects;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.function.Consumer;
import org.apache.activemq.artemis.api.core.ActiveMQException;
import org.apache.activemq.artemis.api.core.ActiveMQIllegalStateException;
import org.apache.activemq.artemis.api.core.Pair;
import org.apache.activemq.artemis.api.core.SimpleString;
import org.apache.activemq.artemis.api.core.TransportConfiguration;
import org.apache.activemq.artemis.core.protocol.core.Channel;
import org.apache.activemq.artemis.core.replication.ReplicationEndpoint;
import org.apache.activemq.artemis.core.server.ActiveMQServer;
import org.apache.activemq.artemis.core.server.ActiveMQServerLogger;
import org.apache.activemq.artemis.core.server.LiveNodeLocator;
import org.apache.activemq.artemis.core.server.NodeManager;
import org.apache.activemq.artemis.core.server.cluster.ClusterControl;
import org.apache.activemq.artemis.core.server.cluster.ClusterController;
import org.apache.activemq.artemis.core.server.cluster.ha.ReplicationBackupPolicy;
import org.apache.activemq.artemis.quorum.DistributedLock;
import org.apache.activemq.artemis.quorum.DistributedPrimitiveManager;
import org.apache.activemq.artemis.quorum.UnavailableStateException;
import org.jboss.logging.Logger;
import static org.apache.activemq.artemis.core.server.impl.ReplicationObserver.ReplicationFailure;
import static org.apache.activemq.artemis.core.server.impl.quorum.ActivationSequenceStateMachine.ensureSequentialAccessToNodeData;
import static org.apache.activemq.artemis.core.server.impl.quorum.ActivationSequenceStateMachine.tryActivate;
/**
* This activation can be used by a primary while trying to fail-back ie {@code failback == true} or
* by a natural-born backup ie {@code failback == false}.<br>
*/
public final class ReplicationBackupActivation extends Activation implements DistributedPrimitiveManager.UnavailableManagerListener {
private static final Logger LOGGER = Logger.getLogger(ReplicationBackupActivation.class);
private final ReplicationBackupPolicy policy;
private final ActiveMQServerImpl activeMQServer;
// This field is != null iff this node is a primary during a fail-back ie acting as a backup in order to become live again.
private final String expectedNodeID;
@GuardedBy("this")
private boolean closed;
private final DistributedPrimitiveManager distributedManager;
// Used for monitoring purposes
private volatile ReplicationObserver replicationObserver;
// Used for testing purposes
private volatile ReplicationEndpoint replicationEndpoint;
// Used for testing purposes
private Consumer<ReplicationEndpoint> onReplicationEndpointCreation;
// Used to arbiter one-shot server stop/restart
private final AtomicBoolean stopping;
public ReplicationBackupActivation(final ActiveMQServerImpl activeMQServer,
final DistributedPrimitiveManager distributedManager,
final ReplicationBackupPolicy policy) {
this.activeMQServer = activeMQServer;
if (policy.isTryFailback()) {
final SimpleString serverNodeID = activeMQServer.getNodeID();
if (serverNodeID == null || serverNodeID.isEmpty()) {
throw new IllegalStateException("A failback activation must be biased around a specific NodeID");
}
this.expectedNodeID = serverNodeID.toString();
} else {
this.expectedNodeID = null;
}
this.distributedManager = distributedManager;
this.policy = policy;
this.replicationObserver = null;
this.replicationEndpoint = null;
this.stopping = new AtomicBoolean(false);
}
/**
* used for testing purposes.
*/
public DistributedPrimitiveManager getDistributedManager() {
return distributedManager;
}
@Override
public void onUnavailableManagerEvent() {
synchronized (this) {
if (closed) {
return;
}
}
LOGGER.info("Unavailable quorum service detected: try restart server");
asyncRestartServer(activeMQServer, true);
}
/**
* This util class exists because {@link LiveNodeLocator} need a {@link LiveNodeLocator.BackupRegistrationListener}
* to forward backup registration failure events: this is used to switch on/off backup registration event listening
* on an existing locator.
*/
private static final class RegistrationFailureForwarder implements LiveNodeLocator.BackupRegistrationListener, AutoCloseable {
private static final LiveNodeLocator.BackupRegistrationListener NOOP_LISTENER = ignore -> {
};
private volatile LiveNodeLocator.BackupRegistrationListener listener = NOOP_LISTENER;
public RegistrationFailureForwarder to(LiveNodeLocator.BackupRegistrationListener listener) {
this.listener = listener;
return this;
}
@Override
public void onBackupRegistrationFailed(boolean alreadyReplicating) {
listener.onBackupRegistrationFailed(alreadyReplicating);
}
@Override
public void close() {
listener = NOOP_LISTENER;
}
}
@Override
public void run() {
synchronized (this) {
if (closed) {
return;
}
}
try {
distributedManager.start();
final long nodeActivationSequence = activeMQServer.getNodeManager().readNodeActivationSequence();
// only a backup with positive local activation sequence could contain valuable data
if (nodeActivationSequence > 0) {
final String nodeId = activeMQServer.getNodeManager().getNodeId().toString();
DistributedLock liveLockWithInSyncReplica;
while (true) {
distributedManager.start();
try {
liveLockWithInSyncReplica = tryActivate(nodeId, nodeActivationSequence, distributedManager, LOGGER);
break;
} catch (UnavailableStateException canRecoverEx) {
distributedManager.stop();
}
}
if (liveLockWithInSyncReplica != null) {
// retain state and start as live
if (!activeMQServer.initialisePart1(false)) {
return;
}
activeMQServer.setState(ActiveMQServerImpl.SERVER_STATE.STARTED);
startAsLive(liveLockWithInSyncReplica);
return;
}
}
distributedManager.addUnavailableManagerListener(this);
// Stop the previous node manager and create a new one with NodeManager::replicatedBackup == true:
// NodeManager::start skip setup lock file with NodeID, until NodeManager::stopBackup is called.
activeMQServer.resetNodeManager();
// A primary need to preserve NodeID across runs
activeMQServer.moveServerData(policy.getMaxSavedReplicatedJournalsSize(), policy.isTryFailback());
activeMQServer.getNodeManager().start();
if (!activeMQServer.initialisePart1(false)) {
return;
}
synchronized (this) {
if (closed)
return;
}
final ClusterController clusterController = activeMQServer.getClusterManager().getClusterController();
LOGGER.infof("Apache ActiveMQ Artemis Backup Server version %s [%s] started, awaiting connection to a live cluster member to start replication", activeMQServer.getVersion().getFullVersion(),
activeMQServer.toString());
clusterController.awaitConnectionToReplicationCluster();
activeMQServer.getBackupManager().start();
activeMQServer.setState(ActiveMQServerImpl.SERVER_STATE.STARTED);
final DistributedLock liveLock = replicateAndFailover(clusterController);
if (liveLock == null) {
return;
}
startAsLive(liveLock);
} catch (Exception e) {
if ((e instanceof InterruptedException || e instanceof IllegalStateException) && !activeMQServer.isStarted()) {
// do not log these errors if the server is being stopped.
return;
}
ActiveMQServerLogger.LOGGER.initializationError(e);
}
}
private void startAsLive(final DistributedLock liveLock) throws Exception {
activeMQServer.setHAPolicy(policy.getLivePolicy());
synchronized (activeMQServer) {
if (!activeMQServer.isStarted()) {
liveLock.close();
return;
}
try {
ensureSequentialAccessToNodeData(activeMQServer, distributedManager, LOGGER);
} catch (Throwable fatal) {
LOGGER.warn(fatal);
// policy is already live one, but there's no activation yet: we can just stop
asyncRestartServer(activeMQServer, false, false);
throw new ActiveMQIllegalStateException("This server cannot ensure sequential access to broker data: activation is failed");
}
ActiveMQServerLogger.LOGGER.becomingLive(activeMQServer);
// stopBackup is going to write the NodeID previously set on the NodeManager,
// because activeMQServer.resetNodeManager() has created a NodeManager with replicatedBackup == true.
activeMQServer.getNodeManager().stopBackup();
activeMQServer.getStorageManager().start();
activeMQServer.getBackupManager().activated();
// IMPORTANT:
// we're setting this activation JUST because it would allow the server to use its
// getActivationChannelHandler to handle replication
final ReplicationPrimaryActivation primaryActivation = new ReplicationPrimaryActivation(activeMQServer, distributedManager, policy.getLivePolicy());
liveLock.addListener(primaryActivation);
activeMQServer.setActivation(primaryActivation);
activeMQServer.initialisePart2(false);
// calling primaryActivation.stateChanged !isHelByCaller is necessary in case the lock was unavailable
// before liveLock.addListener: just throwing an exception won't stop the broker.
final boolean stillLive;
try {
stillLive = liveLock.isHeldByCaller();
} catch (UnavailableStateException e) {
LOGGER.warn(e);
primaryActivation.onUnavailableLockEvent();
throw new ActiveMQIllegalStateException("This server cannot check its role as a live: activation is failed");
}
if (!stillLive) {
primaryActivation.onUnavailableLockEvent();
throw new ActiveMQIllegalStateException("This server is not live anymore: activation is failed");
}
if (activeMQServer.getIdentity() != null) {
ActiveMQServerLogger.LOGGER.serverIsLive(activeMQServer.getIdentity());
} else {
ActiveMQServerLogger.LOGGER.serverIsLive();
}
activeMQServer.completeActivation(true);
}
}
private LiveNodeLocator createLiveNodeLocator(final LiveNodeLocator.BackupRegistrationListener registrationListener) {
if (expectedNodeID != null) {
assert policy.isTryFailback();
return new NamedLiveNodeIdLocatorForReplication(expectedNodeID, registrationListener, policy.getRetryReplicationWait());
}
return policy.getGroupName() == null ?
new AnyLiveNodeLocatorForReplication(registrationListener, activeMQServer, policy.getRetryReplicationWait()) :
new NamedLiveNodeLocatorForReplication(policy.getGroupName(), registrationListener, policy.getRetryReplicationWait());
}
private DistributedLock replicateAndFailover(final ClusterController clusterController) throws ActiveMQException, InterruptedException {
final RegistrationFailureForwarder registrationFailureForwarder = new RegistrationFailureForwarder();
// node locator isn't stateless and contains a live-list of candidate nodes to connect too, hence
// it MUST be reused for each replicateLive attempt
final LiveNodeLocator nodeLocator = createLiveNodeLocator(registrationFailureForwarder);
clusterController.addClusterTopologyListenerForReplication(nodeLocator);
try {
while (true) {
synchronized (this) {
if (closed) {
return null;
}
}
final ReplicationFailure failure = replicateLive(clusterController, nodeLocator, registrationFailureForwarder);
if (failure == null) {
Thread.sleep(clusterController.getRetryIntervalForReplicatedCluster());
continue;
}
if (!activeMQServer.isStarted()) {
return null;
}
LOGGER.debugf("ReplicationFailure = %s", failure);
switch (failure) {
case VoluntaryFailOver:
case NonVoluntaryFailover:
// from now on we're meant to stop:
// - due to failover
// - due to restart/stop
if (!stopping.compareAndSet(false, true)) {
return null;
}
// no more interested into these events: handling it manually from here
distributedManager.removeUnavailableManagerListener(this);
final long nodeActivationSequence = activeMQServer.getNodeManager().readNodeActivationSequence();
final String nodeId = activeMQServer.getNodeManager().getNodeId().toString();
DistributedLock liveLockWithInSyncReplica = null;
if (nodeActivationSequence > 0) {
try {
liveLockWithInSyncReplica = tryActivate(nodeId, nodeActivationSequence, distributedManager, LOGGER);
} catch (Throwable error) {
// no need to retry here, can just restart as backup that will handle a more resilient tryActivate
LOGGER.warn("Errored while attempting failover", error);
liveLockWithInSyncReplica = null;
}
} else {
LOGGER.warnf("We expect local activation sequence for NodeID = %s to be > 0 on a fail-over, while is %d", nodeId, nodeActivationSequence);
}
assert stopping.get();
if (liveLockWithInSyncReplica != null) {
return liveLockWithInSyncReplica;
}
ActiveMQServerLogger.LOGGER.restartingAsBackupBasedOnQuorumVoteResults();
// let's ignore the stopping flag here, we're already in control of it
asyncRestartServer(activeMQServer, true, false);
return null;
case RegistrationError:
LOGGER.error("Stopping broker because of critical registration error");
asyncRestartServer(activeMQServer, false);
return null;
case AlreadyReplicating:
// can just retry here, data should be clean and nodeLocator
// should remove the live node that has answered this
LOGGER.info("Live broker was already replicating: retry sync with another live");
continue;
case ClosedObserver:
return null;
case BackupNotInSync:
LOGGER.info("Replication failure while initial sync not yet completed: restart as backup");
asyncRestartServer(activeMQServer, true);
return null;
case WrongNodeId:
LOGGER.error("Stopping broker because of wrong node ID communication from live: maybe a misbehaving live?");
asyncRestartServer(activeMQServer, false);
return null;
default:
throw new AssertionError("Unsupported failure " + failure);
}
}
} finally {
silentExecution("Error on cluster topology listener for replication cleanup", () -> clusterController.removeClusterTopologyListenerForReplication(nodeLocator));
}
}
private ReplicationObserver replicationObserver() {
if (policy.isTryFailback()) {
return ReplicationObserver.failbackObserver(activeMQServer.getNodeManager(), activeMQServer.getBackupManager(), activeMQServer.getScheduledPool(), expectedNodeID);
}
return ReplicationObserver.failoverObserver(activeMQServer.getNodeManager(), activeMQServer.getBackupManager(), activeMQServer.getScheduledPool());
}
private ReplicationFailure replicateLive(final ClusterController clusterController,
final LiveNodeLocator liveLocator,
final RegistrationFailureForwarder registrationFailureForwarder) throws ActiveMQException {
try (ReplicationObserver replicationObserver = replicationObserver();
RegistrationFailureForwarder ignored = registrationFailureForwarder.to(replicationObserver)) {
this.replicationObserver = replicationObserver;
clusterController.addClusterTopologyListener(replicationObserver);
// ReplicationError notifies backup registration failures to live locator -> forwarder -> observer
final ReplicationError replicationError = new ReplicationError(liveLocator);
clusterController.addIncomingInterceptorForReplication(replicationError);
try {
final ClusterControl liveControl = tryLocateAndConnectToLive(liveLocator, clusterController);
if (liveControl == null) {
return null;
}
try {
final ReplicationEndpoint replicationEndpoint = tryAuthorizeAndAsyncRegisterAsBackupToLive(liveControl, replicationObserver);
if (replicationEndpoint == null) {
return ReplicationFailure.RegistrationError;
}
this.replicationEndpoint = replicationEndpoint;
try {
return replicationObserver.awaitReplicationFailure();
} finally {
this.replicationEndpoint = null;
ActiveMQServerImpl.stopComponent(replicationEndpoint);
closeChannelOf(replicationEndpoint);
}
} finally {
silentExecution("Error on live control close", liveControl::close);
}
} finally {
silentExecution("Error on cluster topology listener cleanup", () -> clusterController.removeClusterTopologyListener(replicationObserver));
silentExecution("Error while removing incoming interceptor for replication", () -> clusterController.removeIncomingInterceptorForReplication(replicationError));
}
} finally {
this.replicationObserver = null;
}
}
private static void silentExecution(String debugErrorMessage, Runnable task) {
try {
task.run();
} catch (Throwable ignore) {
LOGGER.debug(debugErrorMessage, ignore);
}
}
private static void closeChannelOf(final ReplicationEndpoint replicationEndpoint) {
if (replicationEndpoint == null) {
return;
}
if (replicationEndpoint.getChannel() != null) {
silentExecution("Error while closing replication endpoint channel", () -> replicationEndpoint.getChannel().close());
replicationEndpoint.setChannel(null);
}
}
private boolean asyncRestartServer(final ActiveMQServer server, boolean restart) {
return asyncRestartServer(server, restart, true);
}
private boolean asyncRestartServer(final ActiveMQServer server, boolean restart, boolean checkStopping) {
if (checkStopping) {
if (!stopping.compareAndSet(false, true)) {
return false;
}
}
new Thread(() -> {
if (server.getState() != ActiveMQServer.SERVER_STATE.STOPPED && server.getState() != ActiveMQServer.SERVER_STATE.STOPPING) {
try {
server.stop(!restart);
if (restart) {
server.start();
}
} catch (Exception e) {
if (restart) {
ActiveMQServerLogger.LOGGER.errorRestartingBackupServer(e, server);
} else {
ActiveMQServerLogger.LOGGER.errorStoppingServer(e);
}
}
}
}).start();
return true;
}
private ClusterControl tryLocateAndConnectToLive(final LiveNodeLocator liveLocator,
final ClusterController clusterController) throws ActiveMQException {
liveLocator.locateNode();
final Pair<TransportConfiguration, TransportConfiguration> possibleLive = liveLocator.getLiveConfiguration();
final String nodeID = liveLocator.getNodeID();
if (nodeID == null) {
throw new RuntimeException("Could not establish the connection with any live");
}
if (!policy.isTryFailback()) {
assert expectedNodeID == null;
activeMQServer.getNodeManager().setNodeID(nodeID);
} else {
assert expectedNodeID.equals(nodeID);
}
if (possibleLive == null) {
return null;
}
final ClusterControl liveControl = tryConnectToNodeInReplicatedCluster(clusterController, possibleLive.getA());
if (liveControl != null) {
return liveControl;
}
return tryConnectToNodeInReplicatedCluster(clusterController, possibleLive.getB());
}
private static ClusterControl tryConnectToNodeInReplicatedCluster(final ClusterController clusterController,
final TransportConfiguration tc) {
try {
if (tc != null) {
return clusterController.connectToNodeInReplicatedCluster(tc);
}
} catch (Exception e) {
LOGGER.debug(e.getMessage(), e);
}
return null;
}
@Override
public void close(final boolean permanently, final boolean restarting) throws Exception {
synchronized (this) {
closed = true;
final ReplicationObserver replicationObserver = this.replicationObserver;
if (replicationObserver != null) {
replicationObserver.close();
}
}
//we have to check as the server policy may have changed
try {
if (activeMQServer.getHAPolicy().isBackup()) {
// To avoid a NPE cause by the stop
final NodeManager nodeManager = activeMQServer.getNodeManager();
activeMQServer.interruptActivationThread(nodeManager);
if (nodeManager != null) {
nodeManager.stopBackup();
}
}
} finally {
// this one need to happen after interrupting the activation thread
// in order to unblock distributedManager::start
distributedManager.stop();
}
}
@Override
public void preStorageClose() throws Exception {
// TODO replication endpoint close?
}
private ReplicationEndpoint tryAuthorizeAndAsyncRegisterAsBackupToLive(final ClusterControl liveControl,
final ReplicationObserver liveObserver) {
ReplicationEndpoint replicationEndpoint = null;
try {
liveControl.getSessionFactory().setReconnectAttempts(1);
liveObserver.listenConnectionFailuresOf(liveControl.getSessionFactory());
liveControl.authorize();
replicationEndpoint = new ReplicationEndpoint(activeMQServer, policy.isTryFailback(), liveObserver);
final Consumer<ReplicationEndpoint> onReplicationEndpointCreation = this.onReplicationEndpointCreation;
if (onReplicationEndpointCreation != null) {
onReplicationEndpointCreation.accept(replicationEndpoint);
}
replicationEndpoint.setExecutor(activeMQServer.getExecutorFactory().getExecutor());
connectToReplicationEndpoint(liveControl, replicationEndpoint);
replicationEndpoint.start();
liveControl.announceReplicatingBackupToLive(policy.isTryFailback(), policy.getClusterName());
return replicationEndpoint;
} catch (Exception e) {
ActiveMQServerLogger.LOGGER.replicationStartProblem(e);
ActiveMQServerImpl.stopComponent(replicationEndpoint);
closeChannelOf(replicationEndpoint);
return null;
}
}
private static boolean connectToReplicationEndpoint(final ClusterControl liveControl,
final ReplicationEndpoint replicationEndpoint) {
final Channel replicationChannel = liveControl.createReplicationChannel();
replicationChannel.setHandler(replicationEndpoint);
replicationEndpoint.setChannel(replicationChannel);
return true;
}
@Override
public boolean isReplicaSync() {
// NOTE: this method is just for monitoring purposes, not suitable to perform logic!
// During a failover this backup won't have any active liveObserver and will report `false`!!
final ReplicationObserver liveObserver = this.replicationObserver;
if (liveObserver == null) {
return false;
}
return liveObserver.isBackupUpToDate();
}
public ReplicationEndpoint getReplicationEndpoint() {
return replicationEndpoint;
}
/**
* This must be used just for testing purposes.
*/
public void spyReplicationEndpointCreation(Consumer<ReplicationEndpoint> onReplicationEndpointCreation) {
Objects.requireNonNull(onReplicationEndpointCreation);
this.onReplicationEndpointCreation = onReplicationEndpointCreation;
}
}

View File

@ -0,0 +1,332 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.core.server.impl;
import javax.annotation.concurrent.GuardedBy;
import java.util.Objects;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.TimeUnit;
import org.apache.activemq.artemis.api.core.ActiveMQException;
import org.apache.activemq.artemis.api.core.client.ClusterTopologyListener;
import org.apache.activemq.artemis.api.core.client.SessionFailureListener;
import org.apache.activemq.artemis.api.core.client.TopologyMember;
import org.apache.activemq.artemis.core.client.impl.ClientSessionFactoryInternal;
import org.apache.activemq.artemis.core.protocol.core.CoreRemotingConnection;
import org.apache.activemq.artemis.core.protocol.core.impl.wireformat.ReplicationLiveIsStoppingMessage;
import org.apache.activemq.artemis.core.replication.ReplicationEndpoint;
import org.apache.activemq.artemis.core.server.LiveNodeLocator.BackupRegistrationListener;
import org.apache.activemq.artemis.core.server.NodeManager;
import org.apache.activemq.artemis.core.server.cluster.BackupManager;
import org.jboss.logging.Logger;
final class ReplicationObserver implements ClusterTopologyListener, SessionFailureListener, BackupRegistrationListener, ReplicationEndpoint.ReplicationEndpointEventListener, AutoCloseable {
private static final Logger LOGGER = Logger.getLogger(ReplicationObserver.class);
public enum ReplicationFailure {
VoluntaryFailOver, BackupNotInSync, NonVoluntaryFailover, RegistrationError, AlreadyReplicating, ClosedObserver, WrongNodeId;
}
private final NodeManager nodeManager;
private final BackupManager backupManager;
private final ScheduledExecutorService scheduledPool;
private final boolean failback;
private final String expectedNodeID;
private final CompletableFuture<ReplicationFailure> replicationFailure;
@GuardedBy("this")
private ClientSessionFactoryInternal sessionFactory;
@GuardedBy("this")
private CoreRemotingConnection connection;
@GuardedBy("this")
private ScheduledFuture<?> forcedFailover;
private volatile String liveID;
private volatile boolean backupUpToDate;
private volatile boolean closed;
/**
* This is a safety net in case the live sends the first {@link ReplicationLiveIsStoppingMessage}
* with code {@link org.apache.activemq.artemis.core.protocol.core.impl.wireformat.ReplicationLiveIsStoppingMessage.LiveStopping#STOP_CALLED} and crashes before sending the second with
* {@link org.apache.activemq.artemis.core.protocol.core.impl.wireformat.ReplicationLiveIsStoppingMessage.LiveStopping#FAIL_OVER}.
* <p>
* If the second message does come within this dead line, we fail over anyway.
*/
public static final int WAIT_TIME_AFTER_FIRST_LIVE_STOPPING_MSG = 60;
private ReplicationObserver(final NodeManager nodeManager,
final BackupManager backupManager,
final ScheduledExecutorService scheduledPool,
final boolean failback,
final String expectedNodeID) {
this.nodeManager = nodeManager;
this.backupManager = backupManager;
this.scheduledPool = scheduledPool;
this.failback = failback;
this.expectedNodeID = expectedNodeID;
this.replicationFailure = new CompletableFuture<>();
this.sessionFactory = null;
this.connection = null;
this.forcedFailover = null;
this.liveID = null;
this.backupUpToDate = false;
this.closed = false;
}
public static ReplicationObserver failbackObserver(final NodeManager nodeManager,
final BackupManager backupManager,
final ScheduledExecutorService scheduledPool,
final String expectedNodeID) {
Objects.requireNonNull(expectedNodeID);
return new ReplicationObserver(nodeManager, backupManager, scheduledPool, true, expectedNodeID);
}
public static ReplicationObserver failoverObserver(final NodeManager nodeManager,
final BackupManager backupManager,
final ScheduledExecutorService scheduledPool) {
return new ReplicationObserver(nodeManager, backupManager, scheduledPool, false, null);
}
private void onLiveDown(boolean voluntaryFailover) {
if (closed || replicationFailure.isDone()) {
return;
}
synchronized (this) {
if (closed || replicationFailure.isDone()) {
return;
}
stopForcedFailoverAfterDelay();
unlistenConnectionFailures();
if (!isRemoteBackupUpToDate()) {
replicationFailure.complete(ReplicationFailure.BackupNotInSync);
} else if (voluntaryFailover) {
replicationFailure.complete(ReplicationFailure.VoluntaryFailOver);
} else {
replicationFailure.complete(ReplicationFailure.NonVoluntaryFailover);
}
}
}
@Override
public void nodeDown(long eventUID, String nodeID) {
// ignore it during a failback:
// a failing slave close all connections but the one used for replication
// triggering a nodeDown before the restarted master receive a STOP_CALLED from it.
// This can make master to fire a useless quorum vote during a normal failback.
if (failback) {
return;
}
if (nodeID.equals(liveID)) {
onLiveDown(false);
}
}
@Override
public void nodeUP(TopologyMember member, boolean last) {
}
/**
* if the connection to our replicated live goes down then decide on an action
*/
@Override
public void connectionFailed(ActiveMQException exception, boolean failedOver) {
onLiveDown(false);
}
@Override
public void connectionFailed(final ActiveMQException me, boolean failedOver, String scaleDownTargetNodeID) {
connectionFailed(me, failedOver);
}
@Override
public void beforeReconnect(ActiveMQException exception) {
//noop
}
@Override
public void close() {
if (closed) {
return;
}
synchronized (this) {
if (closed) {
return;
}
unlistenConnectionFailures();
closed = true;
replicationFailure.complete(ReplicationFailure.ClosedObserver);
}
}
/**
* @param liveSessionFactory the session factory used to connect to the live server
*/
public synchronized void listenConnectionFailuresOf(final ClientSessionFactoryInternal liveSessionFactory) {
if (closed) {
throw new IllegalStateException("the observer is closed: cannot listen to any failures");
}
if (sessionFactory != null || connection != null) {
throw new IllegalStateException("this observer is already listening to other session factory failures");
}
this.sessionFactory = liveSessionFactory;
//belts and braces, there are circumstances where the connection listener doesn't get called but the session does.
this.sessionFactory.addFailureListener(this);
connection = (CoreRemotingConnection) liveSessionFactory.getConnection();
connection.addFailureListener(this);
}
public synchronized void unlistenConnectionFailures() {
if (connection != null) {
connection.removeFailureListener(this);
connection = null;
}
if (sessionFactory != null) {
sessionFactory.removeFailureListener(this);
sessionFactory = null;
}
}
@Override
public void onBackupRegistrationFailed(boolean alreadyReplicating) {
if (closed || replicationFailure.isDone()) {
return;
}
synchronized (this) {
if (closed || replicationFailure.isDone()) {
return;
}
stopForcedFailoverAfterDelay();
unlistenConnectionFailures();
replicationFailure.complete(alreadyReplicating ? ReplicationFailure.AlreadyReplicating : ReplicationFailure.RegistrationError);
}
}
public ReplicationFailure awaitReplicationFailure() {
try {
return replicationFailure.get();
} catch (Throwable e) {
return ReplicationFailure.ClosedObserver;
}
}
private synchronized void scheduleForcedFailoverAfterDelay() {
if (forcedFailover != null) {
return;
}
forcedFailover = scheduledPool.schedule(() -> onLiveDown(false), WAIT_TIME_AFTER_FIRST_LIVE_STOPPING_MSG, TimeUnit.SECONDS);
}
private synchronized void stopForcedFailoverAfterDelay() {
if (forcedFailover == null) {
return;
}
forcedFailover.cancel(false);
forcedFailover = null;
}
@Override
public void onRemoteBackupUpToDate() {
if (backupUpToDate || closed || replicationFailure.isDone()) {
return;
}
synchronized (this) {
if (backupUpToDate || closed || replicationFailure.isDone()) {
return;
}
assert liveID != null;
backupManager.announceBackup();
backupUpToDate = true;
}
}
public boolean isBackupUpToDate() {
return backupUpToDate;
}
public String getLiveID() {
return liveID;
}
private boolean validateNodeId(String nodeID) {
if (nodeID == null) {
return false;
}
final String existingNodeId = this.liveID;
if (existingNodeId == null) {
if (!failback) {
return true;
}
return nodeID.equals(expectedNodeID);
}
return existingNodeId.equals(nodeID);
}
@Override
public void onLiveNodeId(String nodeId) {
if (closed || replicationFailure.isDone()) {
return;
}
final String existingNodeId = this.liveID;
if (existingNodeId != null && existingNodeId.equals(nodeId)) {
return;
}
synchronized (this) {
if (closed || replicationFailure.isDone()) {
return;
}
if (!validateNodeId(nodeId)) {
stopForcedFailoverAfterDelay();
unlistenConnectionFailures();
replicationFailure.complete(ReplicationFailure.WrongNodeId);
} else if (liveID == null) {
liveID = nodeId;
nodeManager.setNodeID(nodeId);
}
}
}
public boolean isRemoteBackupUpToDate() {
return backupUpToDate;
}
@Override
public void onLiveStopping(ReplicationLiveIsStoppingMessage.LiveStopping finalMessage) {
if (closed || replicationFailure.isDone()) {
return;
}
synchronized (this) {
if (closed || replicationFailure.isDone()) {
return;
}
switch (finalMessage) {
case STOP_CALLED:
scheduleForcedFailoverAfterDelay();
break;
case FAIL_OVER:
onLiveDown(true);
break;
default:
LOGGER.errorf("unsupported LiveStopping type: %s", finalMessage);
}
}
}
}

View File

@ -0,0 +1,430 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.core.server.impl;
import javax.annotation.concurrent.GuardedBy;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.activemq.artemis.api.core.ActiveMQAlreadyReplicatingException;
import org.apache.activemq.artemis.api.core.ActiveMQException;
import org.apache.activemq.artemis.api.core.ActiveMQIllegalStateException;
import org.apache.activemq.artemis.api.core.Pair;
import org.apache.activemq.artemis.api.core.TransportConfiguration;
import org.apache.activemq.artemis.core.persistence.StorageManager;
import org.apache.activemq.artemis.core.protocol.core.Channel;
import org.apache.activemq.artemis.core.protocol.core.ChannelHandler;
import org.apache.activemq.artemis.core.protocol.core.CoreRemotingConnection;
import org.apache.activemq.artemis.core.protocol.core.impl.PacketImpl;
import org.apache.activemq.artemis.core.protocol.core.impl.wireformat.BackupRegistrationMessage;
import org.apache.activemq.artemis.core.protocol.core.impl.wireformat.BackupReplicationStartFailedMessage;
import org.apache.activemq.artemis.core.protocol.core.impl.wireformat.ReplicationLiveIsStoppingMessage;
import org.apache.activemq.artemis.core.remoting.CloseListener;
import org.apache.activemq.artemis.core.remoting.FailureListener;
import org.apache.activemq.artemis.core.remoting.server.RemotingService;
import org.apache.activemq.artemis.core.replication.ReplicationManager;
import org.apache.activemq.artemis.core.server.ActiveMQServerLogger;
import org.apache.activemq.artemis.core.server.NodeManager;
import org.apache.activemq.artemis.core.server.cluster.ClusterConnection;
import org.apache.activemq.artemis.core.server.cluster.ha.ReplicationPrimaryPolicy;
import org.apache.activemq.artemis.quorum.DistributedLock;
import org.apache.activemq.artemis.quorum.DistributedPrimitiveManager;
import org.apache.activemq.artemis.quorum.UnavailableStateException;
import org.apache.activemq.artemis.spi.core.remoting.Acceptor;
import org.jboss.logging.Logger;
import static org.apache.activemq.artemis.core.server.ActiveMQServer.SERVER_STATE.STARTED;
import static org.apache.activemq.artemis.core.server.impl.quorum.ActivationSequenceStateMachine.awaitNextCommittedActivationSequence;
import static org.apache.activemq.artemis.core.server.impl.quorum.ActivationSequenceStateMachine.ensureSequentialAccessToNodeData;
import static org.apache.activemq.artemis.core.server.impl.quorum.ActivationSequenceStateMachine.tryActivate;
/**
* This is going to be {@link #run()} just by natural born primary, at the first start.
* Both during a failover or a failback, {@link #run()} isn't going to be used, but only {@link #getActivationChannelHandler(Channel, Acceptor)}.
*/
public class ReplicationPrimaryActivation extends LiveActivation implements DistributedLock.UnavailableLockListener {
private static final Logger LOGGER = Logger.getLogger(ReplicationPrimaryActivation.class);
// This is the time we expect a replica to become a live from the quorum pov
// ie time to execute tryActivate and ensureSequentialAccessToNodeData
private static final long FAILBACK_TIMEOUT_MILLIS = 4_000;
private final ReplicationPrimaryPolicy policy;
private final ActiveMQServerImpl activeMQServer;
@GuardedBy("replicationLock")
private ReplicationManager replicationManager;
private final Object replicationLock;
private final DistributedPrimitiveManager distributedManager;
private final AtomicBoolean stoppingServer;
public ReplicationPrimaryActivation(final ActiveMQServerImpl activeMQServer,
final DistributedPrimitiveManager distributedManager,
final ReplicationPrimaryPolicy policy) {
this.activeMQServer = activeMQServer;
this.policy = policy;
this.replicationLock = new Object();
this.distributedManager = distributedManager;
this.stoppingServer = new AtomicBoolean();
}
/**
* used for testing purposes.
*/
public DistributedPrimitiveManager getDistributedManager() {
return distributedManager;
}
@Override
public void freezeConnections(RemotingService remotingService) {
final ReplicationManager replicationManager = getReplicationManager();
if (remotingService != null && replicationManager != null) {
remotingService.freeze(null, replicationManager.getBackupTransportConnection());
} else if (remotingService != null) {
remotingService.freeze(null, null);
}
}
@Override
public void run() {
try {
// we have a common nodeId that we can share and coordinate with between peers
if (policy.getCoordinationId() != null) {
LOGGER.infof("Applying shared peer NodeID=%s to enable coordinated live activation", policy.getCoordinationId());
// REVISIT: this is quite clunky, also in backup activation, we just need new nodeID persisted!
activeMQServer.resetNodeManager();
activeMQServer.getNodeManager().start();
activeMQServer.getNodeManager().setNodeID(policy.getCoordinationId());
activeMQServer.getNodeManager().stopBackup();
}
final long nodeActivationSequence = activeMQServer.getNodeManager().readNodeActivationSequence();
final String nodeId = activeMQServer.getNodeManager().readNodeId().toString();
DistributedLock liveLock;
while (true) {
distributedManager.start();
try {
liveLock = tryActivate(nodeId, nodeActivationSequence, distributedManager, LOGGER);
break;
} catch (UnavailableStateException canRecoverEx) {
distributedManager.stop();
}
}
if (liveLock == null) {
distributedManager.stop();
LOGGER.infof("This broker cannot become a live server with NodeID = %s: restarting as backup", nodeId);
activeMQServer.setHAPolicy(policy.getBackupPolicy());
return;
}
ensureSequentialAccessToNodeData(activeMQServer, distributedManager, LOGGER);
activeMQServer.initialisePart1(false);
activeMQServer.initialisePart2(false);
// must be registered before checking the caller
liveLock.addListener(this);
// This control is placed here because initialisePart2 is going to load the journal that
// could pause the JVM for enough time to lose lock ownership
if (!liveLock.isHeldByCaller()) {
throw new IllegalStateException("This broker isn't live anymore, probably due to application pauses eg GC, OS etc: failing now");
}
activeMQServer.completeActivation(true);
if (activeMQServer.getIdentity() != null) {
ActiveMQServerLogger.LOGGER.serverIsLive(activeMQServer.getIdentity());
} else {
ActiveMQServerLogger.LOGGER.serverIsLive();
}
} catch (Exception e) {
// async stop it, we don't need to await this to complete
distributedManager.stop();
ActiveMQServerLogger.LOGGER.initializationError(e);
activeMQServer.callActivationFailureListeners(e);
}
}
@Override
public ChannelHandler getActivationChannelHandler(final Channel channel, final Acceptor acceptorUsed) {
if (stoppingServer.get()) {
return null;
}
return packet -> {
if (packet.getType() == PacketImpl.BACKUP_REGISTRATION) {
onBackupRegistration(channel, acceptorUsed, (BackupRegistrationMessage) packet);
}
};
}
private void onBackupRegistration(final Channel channel,
final Acceptor acceptorUsed,
final BackupRegistrationMessage msg) {
try {
startAsyncReplication(channel.getConnection(), acceptorUsed.getClusterConnection(), msg.getConnector(), msg.isFailBackRequest());
} catch (ActiveMQAlreadyReplicatingException are) {
channel.send(new BackupReplicationStartFailedMessage(BackupReplicationStartFailedMessage.BackupRegistrationProblem.ALREADY_REPLICATING));
} catch (ActiveMQException e) {
LOGGER.debug("Failed to process backup registration packet", e);
channel.send(new BackupReplicationStartFailedMessage(BackupReplicationStartFailedMessage.BackupRegistrationProblem.EXCEPTION));
}
}
private void startAsyncReplication(final CoreRemotingConnection remotingConnection,
final ClusterConnection clusterConnection,
final TransportConfiguration backupTransport,
final boolean isFailBackRequest) throws ActiveMQException {
synchronized (replicationLock) {
if (replicationManager != null) {
throw new ActiveMQAlreadyReplicatingException();
}
if (!activeMQServer.isStarted()) {
throw new ActiveMQIllegalStateException();
}
final ReplicationFailureListener listener = new ReplicationFailureListener();
remotingConnection.addCloseListener(listener);
remotingConnection.addFailureListener(listener);
final ReplicationManager replicationManager = new ReplicationManager(activeMQServer, remotingConnection, clusterConnection.getCallTimeout(), policy.getInitialReplicationSyncTimeout(), activeMQServer.getIOExecutorFactory());
this.replicationManager = replicationManager;
replicationManager.start();
final Thread replicatingThread = new Thread(() -> replicate(replicationManager, clusterConnection, isFailBackRequest, backupTransport));
replicatingThread.setName("async-replication-thread");
replicatingThread.start();
}
}
private void replicate(final ReplicationManager replicationManager,
final ClusterConnection clusterConnection,
final boolean isFailBackRequest,
final TransportConfiguration backupTransport) {
try {
final String nodeID = activeMQServer.getNodeID().toString();
activeMQServer.getStorageManager().startReplication(replicationManager, activeMQServer.getPagingManager(), nodeID, isFailBackRequest && policy.isAllowAutoFailBack(), policy.getInitialReplicationSyncTimeout());
clusterConnection.nodeAnnounced(System.currentTimeMillis(), nodeID, policy.getGroupName(), policy.getScaleDownGroupName(), new Pair<>(null, backupTransport), true);
if (isFailBackRequest && policy.isAllowAutoFailBack()) {
awaitBackupAnnouncementOnFailbackRequest(clusterConnection);
}
} catch (Exception e) {
if (activeMQServer.getState() == STARTED) {
/*
* The reasoning here is that the exception was either caused by (1) the
* (interaction with) the backup, or (2) by an IO Error at the storage. If (1), we
* can swallow the exception and ignore the replication request. If (2) the live
* will crash shortly.
*/
ActiveMQServerLogger.LOGGER.errorStartingReplication(e);
}
try {
ActiveMQServerImpl.stopComponent(replicationManager);
} catch (Exception amqe) {
ActiveMQServerLogger.LOGGER.errorStoppingReplication(amqe);
} finally {
synchronized (replicationLock) {
if (this.replicationManager == replicationManager) {
this.replicationManager = null;
}
}
}
}
}
/**
* This is handling awaiting backup announcement before trying to failover.
* This broker is a backup broker, acting as a live and ready to restart as a backup
*/
private void awaitBackupAnnouncementOnFailbackRequest(ClusterConnection clusterConnection) throws Exception {
final String nodeID = activeMQServer.getNodeID().toString();
final BackupTopologyListener topologyListener = new BackupTopologyListener(nodeID, clusterConnection.getConnector());
clusterConnection.addClusterTopologyListener(topologyListener);
try {
if (topologyListener.waitForBackup()) {
restartAsBackupAfterFailback();
} else {
ActiveMQServerLogger.LOGGER.failbackMissedBackupAnnouncement();
}
} finally {
clusterConnection.removeClusterTopologyListener(topologyListener);
}
}
private void restartAsBackupAfterFailback() throws Exception {
if (stoppingServer.get()) {
return;
}
final String coordinatedLockAndNodeId;
final long inSyncReplicaActivation;
synchronized (replicationLock) {
if (stoppingServer.get()) {
return;
}
final ReplicationManager replicationManager = this.replicationManager;
if (replicationManager == null) {
LOGGER.warnf("Failback interrupted");
// we got a disconnection from the replica *before* stopping acceptors: better not failback!
return;
}
// IMPORTANT: this is going to save server::fail to issue a replica connection failure (with failed == false)
// because onReplicationConnectionClose fail-fast on stopping == true.
if (!stoppingServer.compareAndSet(false, true)) {
LOGGER.infof("Failback interrupted: server is already stopping");
return;
}
coordinatedLockAndNodeId = activeMQServer.getNodeManager().getNodeId().toString();
inSyncReplicaActivation = activeMQServer.getNodeManager().getNodeActivationSequence();
// none can notice a concurrent drop of replica connection here: awaitNextCommittedActivationSequence defensively
// wait FAILBACK_TIMEOUT_MILLIS, proceed as backup and compete to become live again
activeMQServer.fail(true);
}
try {
distributedManager.start();
if (!awaitNextCommittedActivationSequence(distributedManager, coordinatedLockAndNodeId, inSyncReplicaActivation, FAILBACK_TIMEOUT_MILLIS, LOGGER)) {
LOGGER.warnf("Timed out waiting for failback server activation with NodeID = %s: and sequence > %d: after %dms",
coordinatedLockAndNodeId, inSyncReplicaActivation, FAILBACK_TIMEOUT_MILLIS);
}
} catch (UnavailableStateException ignored) {
LOGGER.debug("Unavailable distributed manager while awaiting failback activation sequence: ignored", ignored);
} finally {
distributedManager.stop();
}
ActiveMQServerLogger.LOGGER.restartingReplicatedBackupAfterFailback();
activeMQServer.setHAPolicy(policy.getBackupPolicy());
activeMQServer.start();
}
private void asyncStopServer() {
if (stoppingServer.get()) {
return;
}
if (stoppingServer.compareAndSet(false, true)) {
new Thread(() -> {
try {
activeMQServer.stop();
} catch (Exception e) {
ActiveMQServerLogger.LOGGER.errorRestartingBackupServer(e, activeMQServer);
}
}).start();
}
}
@Override
public void onUnavailableLockEvent() {
LOGGER.error("Quorum UNAVAILABLE: async stopping broker.");
asyncStopServer();
}
private final class ReplicationFailureListener implements FailureListener, CloseListener {
@Override
public void connectionFailed(ActiveMQException exception, boolean failedOver) {
onReplicationConnectionClose();
}
@Override
public void connectionFailed(final ActiveMQException me, boolean failedOver, String scaleDownTargetNodeID) {
connectionFailed(me, failedOver);
}
@Override
public void connectionClosed() {
onReplicationConnectionClose();
}
}
private void onReplicationConnectionClose() {
ExecutorService executorService = activeMQServer.getThreadPool();
if (executorService != null) {
if (stoppingServer.get()) {
return;
}
executorService.execute(() -> {
synchronized (replicationLock) {
if (replicationManager == null) {
return;
}
// we increment only if we are staying alive
if (!stoppingServer.get() && STARTED.equals(activeMQServer.getState())) {
try {
ensureSequentialAccessToNodeData(activeMQServer, distributedManager, LOGGER);
} catch (Throwable fatal) {
LOGGER.errorf(fatal, "Unexpected exception: %s on attempted activation sequence increment; stopping server async", fatal.getLocalizedMessage());
asyncStopServer();
}
}
// this is going to stop the replication manager
final StorageManager storageManager = activeMQServer.getStorageManager();
if (storageManager != null) {
storageManager.stopReplication();
}
replicationManager = null;
}
});
}
}
@Override
public void close(boolean permanently, boolean restarting) throws Exception {
synchronized (replicationLock) {
replicationManager = null;
}
distributedManager.stop();
// To avoid a NPE cause by the stop
final NodeManager nodeManager = activeMQServer.getNodeManager();
if (nodeManager != null) {
if (permanently) {
nodeManager.crashLiveServer();
} else {
nodeManager.pauseLiveServer();
}
}
}
@Override
public void sendLiveIsStopping() {
final ReplicationManager replicationManager = getReplicationManager();
if (replicationManager == null) {
return;
}
replicationManager.sendLiveIsStopping(ReplicationLiveIsStoppingMessage.LiveStopping.STOP_CALLED);
// this pool gets a 'hard' shutdown, no need to manage the Future of this Runnable.
activeMQServer.getScheduledPool().schedule(replicationManager::clearReplicationTokens, 30, TimeUnit.SECONDS);
}
@Override
public ReplicationManager getReplicationManager() {
synchronized (replicationLock) {
return replicationManager;
}
}
@Override
public boolean isReplicaSync() {
final ReplicationManager replicationManager = getReplicationManager();
if (replicationManager == null) {
return false;
}
return !replicationManager.isSynchronizing();
}
}

View File

@ -32,6 +32,7 @@ import org.apache.activemq.artemis.core.postoffice.PostOffice;
import org.apache.activemq.artemis.core.protocol.core.Channel;
import org.apache.activemq.artemis.core.protocol.core.impl.wireformat.ReplicationLiveIsStoppingMessage;
import org.apache.activemq.artemis.core.replication.ReplicationEndpoint;
import org.apache.activemq.artemis.core.replication.ReplicationEndpoint.ReplicationEndpointEventListener;
import org.apache.activemq.artemis.core.server.ActivationParams;
import org.apache.activemq.artemis.core.server.ActiveMQMessageBundle;
import org.apache.activemq.artemis.core.server.ActiveMQServer;
@ -54,7 +55,7 @@ import static org.apache.activemq.artemis.core.server.cluster.qourum.SharedNothi
import static org.apache.activemq.artemis.core.server.cluster.qourum.SharedNothingBackupQuorum.BACKUP_ACTIVATION.FAIL_OVER;
import static org.apache.activemq.artemis.core.server.cluster.qourum.SharedNothingBackupQuorum.BACKUP_ACTIVATION.STOP;
public final class SharedNothingBackupActivation extends Activation {
public final class SharedNothingBackupActivation extends Activation implements ReplicationEndpointEventListener {
private static final Logger logger = Logger.getLogger(SharedNothingBackupActivation.class);
@ -96,7 +97,7 @@ public final class SharedNothingBackupActivation extends Activation {
assert replicationEndpoint == null;
activeMQServer.resetNodeManager();
backupUpToDate = false;
replicationEndpoint = new ReplicationEndpoint(activeMQServer, ioCriticalErrorListener, attemptFailBack, this);
replicationEndpoint = new ReplicationEndpoint(activeMQServer, attemptFailBack, this);
}
@Override
@ -156,9 +157,6 @@ public final class SharedNothingBackupActivation extends Activation {
logger.debug("Starting backup manager");
activeMQServer.getBackupManager().start();
logger.debug("Set backup Quorum");
replicationEndpoint.setBackupQuorum(backupQuorum);
replicationEndpoint.setExecutor(activeMQServer.getExecutorFactory().getExecutor());
EndpointConnector endpointConnector = new EndpointConnector();
@ -461,7 +459,13 @@ public final class SharedNothingBackupActivation extends Activation {
return backupUpToDate;
}
public void setRemoteBackupUpToDate() {
@Override
public void onLiveNodeId(String nodeId) {
backupQuorum.liveIDSet(nodeId);
}
@Override
public void onRemoteBackupUpToDate() {
activeMQServer.getBackupManager().announceBackup();
backupUpToDate = true;
backupSyncLatch.countDown();
@ -470,7 +474,8 @@ public final class SharedNothingBackupActivation extends Activation {
/**
* @throws ActiveMQException
*/
public void remoteFailOver(ReplicationLiveIsStoppingMessage.LiveStopping finalMessage) throws ActiveMQException {
@Override
public void onLiveStopping(ReplicationLiveIsStoppingMessage.LiveStopping finalMessage) throws ActiveMQException {
if (logger.isTraceEnabled()) {
logger.trace("Remote fail-over, got message=" + finalMessage + ", backupUpToDate=" +
backupUpToDate);
@ -526,4 +531,9 @@ public final class SharedNothingBackupActivation extends Activation {
return replicationEndpoint;
}
}
@Override
public boolean isReplicaSync() {
return isRemoteBackupUpToDate();
}
}

View File

@ -462,4 +462,13 @@ public class SharedNothingLiveActivation extends LiveActivation {
private TransportConfiguration[] connectorNameListToArray(final List<String> connectorNames) {
return activeMQServer.getConfiguration().getTransportConfigurations(connectorNames);
}
@Override
public boolean isReplicaSync() {
final ReplicationManager replicationManager = getReplicationManager();
if (replicationManager == null) {
return false;
}
return !replicationManager.isSynchronizing();
}
}

View File

@ -0,0 +1,312 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.core.server.impl.quorum;
import java.util.Objects;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import org.apache.activemq.artemis.api.core.ActiveMQException;
import org.apache.activemq.artemis.core.server.ActiveMQServer;
import org.apache.activemq.artemis.core.server.NodeManager;
import org.apache.activemq.artemis.quorum.DistributedLock;
import org.apache.activemq.artemis.quorum.DistributedPrimitiveManager;
import org.apache.activemq.artemis.quorum.MutableLong;
import org.apache.activemq.artemis.quorum.UnavailableStateException;
import org.jboss.logging.Logger;
/**
* This class contains the activation sequence logic of the pluggable quorum vote:
* it should be used by {@link org.apache.activemq.artemis.core.server.impl.ReplicationBackupActivation}
* and {@link org.apache.activemq.artemis.core.server.impl.ReplicationPrimaryActivation} to coordinate
* for replication.
*/
public final class ActivationSequenceStateMachine {
private static final long CHECK_ACTIVATION_SEQUENCE_WAIT_MILLIS = 200;
private static final long CHECK_REPAIRED_ACTIVATION_SEQUENCE_WAIT_MILLIS = 2000;
private static final long LIVE_LOCK_ACQUIRE_TIMEOUT_MILLIS = 2000;
private ActivationSequenceStateMachine() {
}
/**
* It loops if the data of the broker is still valuable, but cannot become live.
* It loops (temporarly) if data is in sync or can self-heal, but cannot yet acquire the live lock.
* <p>
* It stops loop and return:
* <p><ul>
* <li>{@code null}: if data is stale (and there are no rights to become live)
* <li>{@code !=null}: if data is in sync and the {@link DistributedLock} is correctly acquired
* </ul><p>
* <p>
* After successfully returning from this method ie not null return value, a broker should use
* {@link #ensureSequentialAccessToNodeData(ActiveMQServer, DistributedPrimitiveManager, Logger)} to complete
* the activation and guarantee the initial not-replicated ownership of data.
*/
public static DistributedLock tryActivate(final String nodeId,
final long nodeActivationSequence,
final DistributedPrimitiveManager distributedManager,
final Logger logger) throws InterruptedException, ExecutionException, TimeoutException, UnavailableStateException {
final DistributedLock activationLock = distributedManager.getDistributedLock(nodeId);
try (MutableLong coordinatedNodeSequence = distributedManager.getMutableLong(nodeId)) {
while (true) {
// dirty read is sufficient to know if we are *not* an in sync replica
// typically the lock owner will increment to signal our data is stale and we are happy without any
// further coordination at this point
switch (validateActivationSequence(coordinatedNodeSequence, activationLock, nodeId, nodeActivationSequence, logger)) {
case Stale:
activationLock.close();
return null;
case SelfRepair:
case InSync:
break;
case MaybeInSync:
if (activationLock.tryLock()) {
// BAD: where's the broker that should commit it?
activationLock.unlock();
logger.warnf("Cannot assume live role for NodeID = %s: claimed activation sequence need to be repaired",
nodeId);
TimeUnit.MILLISECONDS.sleep(CHECK_REPAIRED_ACTIVATION_SEQUENCE_WAIT_MILLIS);
continue;
}
// quick path while data is still valuable: wait until something change (commit/repair)
TimeUnit.MILLISECONDS.sleep(CHECK_ACTIVATION_SEQUENCE_WAIT_MILLIS);
continue;
}
// SelfRepair, InSync
if (!activationLock.tryLock(LIVE_LOCK_ACQUIRE_TIMEOUT_MILLIS, TimeUnit.MILLISECONDS)) {
logger.debugf("Candidate for Node ID = %s, with local activation sequence: %d, cannot acquire live lock within %dms; retrying",
nodeId, nodeActivationSequence, LIVE_LOCK_ACQUIRE_TIMEOUT_MILLIS);
continue;
}
switch (validateActivationSequence(coordinatedNodeSequence, activationLock, nodeId, nodeActivationSequence, logger)) {
case Stale:
activationLock.close();
return null;
case SelfRepair:
// Self-repair sequence ie we were the only one with the most up to date data.
// NOTE: We cannot move the sequence now, let's delay it on ensureSequentialAccessToNodeData
logger.infof("Assuming live role for NodeID = %s: local activation sequence %d matches claimed coordinated activation sequence %d. Repairing sequence", nodeId, nodeActivationSequence, nodeActivationSequence);
return activationLock;
case InSync:
// we are an in_sync_replica, good to go live as UNREPLICATED
logger.infof("Assuming live role for NodeID = %s, local activation sequence %d matches current coordinated activation sequence %d", nodeId, nodeActivationSequence, nodeActivationSequence);
return activationLock;
case MaybeInSync:
activationLock.unlock();
logger.warnf("Cannot assume live role for NodeID = %s: claimed activation sequence need to be repaired", nodeId);
TimeUnit.MILLISECONDS.sleep(CHECK_REPAIRED_ACTIVATION_SEQUENCE_WAIT_MILLIS);
continue;
}
}
}
}
private enum ValidationResult {
/**
* coordinated activation sequence (claimed/committed) is far beyond the local one: data is not valuable anymore
**/
Stale,
/**
* coordinated activation sequence is the same as local one: data is in sync
**/
InSync,
/**
* next coordinated activation sequence is not committed yet: maybe data is in sync
**/
MaybeInSync,
/**
* next coordinated activation sequence is not committed yet, but this broker can self-repair: data is in sync
**/
SelfRepair
}
private static ValidationResult validateActivationSequence(final MutableLong coordinatedNodeSequence,
final DistributedLock activationLock,
final String lockAndLongId,
final long nodeActivationSequence,
final Logger logger) throws UnavailableStateException {
assert coordinatedNodeSequence.getMutableLongId().equals(lockAndLongId);
assert activationLock.getLockId().equals(lockAndLongId);
final long currentCoordinatedNodeSequence = coordinatedNodeSequence.get();
if (nodeActivationSequence == currentCoordinatedNodeSequence) {
return ValidationResult.InSync;
}
if (currentCoordinatedNodeSequence > 0) {
logger.infof("Not a candidate for NodeID = %s activation, local activation sequence %d does not match coordinated activation sequence %d",
lockAndLongId, nodeActivationSequence, currentCoordinatedNodeSequence);
return ValidationResult.Stale;
}
// claimed activation sequence
final long claimedCoordinatedNodeSequence = -currentCoordinatedNodeSequence;
final long sequenceGap = claimedCoordinatedNodeSequence - nodeActivationSequence;
if (sequenceGap == 0) {
return ValidationResult.SelfRepair;
}
if (sequenceGap == 1) {
// maybe data is still valuable
return ValidationResult.MaybeInSync;
}
assert sequenceGap > 1;
// sequence is moved so much that data is no longer valuable
logger.infof("Not a candidate for NodeID = %s activation, local activation sequence %d does not match coordinated activation sequence %d",
lockAndLongId, nodeActivationSequence, claimedCoordinatedNodeSequence);
return ValidationResult.Stale;
}
/**
* It wait until {@code timeoutMillis ms} has passed or the coordinated activation sequence has progressed enough
*/
public static boolean awaitNextCommittedActivationSequence(final DistributedPrimitiveManager distributedManager,
final String coordinatedLockAndNodeId,
final long activationSequence,
final long timeoutMills,
final Logger logger)
throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
Objects.requireNonNull(distributedManager);
Objects.requireNonNull(logger);
Objects.requireNonNull(coordinatedLockAndNodeId);
if (activationSequence < 0) {
throw new IllegalArgumentException("activationSequence must be >= 0, while is " + activationSequence);
}
if (!distributedManager.isStarted()) {
throw new IllegalStateException("manager must be started");
}
final MutableLong coordinatedActivationSequence = distributedManager.getMutableLong(coordinatedLockAndNodeId);
// wait for the live to activate and run un replicated with a sequence > inSyncReplicaActivation
// this read can be dirty b/c we are just looking for an increment.
boolean anyNext = false;
final long timeoutNs = TimeUnit.MILLISECONDS.toNanos(timeoutMills);
final long started = System.nanoTime();
long elapsedNs;
do {
final long coordinatedValue = coordinatedActivationSequence.get();
if (coordinatedValue > activationSequence) {
// all good, some activation has gone ahead
logger.infof("Detected a new activation sequence with NodeID = %s: and sequence: %d", coordinatedLockAndNodeId, coordinatedValue);
anyNext = true;
break;
}
if (coordinatedValue < 0) {
// commit claim
final long claimedSequence = -coordinatedValue;
final long activationsGap = claimedSequence - activationSequence;
if (activationsGap > 1) {
// all good, some activation has gone ahead
logger.infof("Detected furthers sequential server activations from sequence %d, with NodeID = %s: and claimed sequence: %d", activationSequence, coordinatedLockAndNodeId, claimedSequence);
anyNext = true;
break;
}
// activation is still in progress
logger.debugf("Detected claiming of activation sequence = %d for NodeID = %s", claimedSequence, coordinatedLockAndNodeId);
}
try {
TimeUnit.MILLISECONDS.sleep(CHECK_ACTIVATION_SEQUENCE_WAIT_MILLIS);
} catch (InterruptedException ignored) {
}
elapsedNs = System.nanoTime() - started;
}
while (elapsedNs < timeoutNs);
return anyNext;
}
/**
* This is going to increment the coordinated activation sequence while holding the live lock, failing with some exception otherwise.<br>
* <p>
* The acceptable states are {@link ValidationResult#InSync} and {@link ValidationResult#SelfRepair}, throwing some exception otherwise.
* <p>
* This must be used while holding a live lock to ensure not-exclusive ownership of data ie can be both used
* while loosing connectivity with a replica or after successfully {@link #tryActivate(String, long, DistributedPrimitiveManager, Logger)}.
*/
public static void ensureSequentialAccessToNodeData(ActiveMQServer activeMQServer,
DistributedPrimitiveManager distributedPrimitiveManager,
final Logger logger) throws ActiveMQException, InterruptedException, UnavailableStateException, ExecutionException, TimeoutException {
final NodeManager nodeManager = activeMQServer.getNodeManager();
final String lockAndLongId = nodeManager.getNodeId().toString();
final DistributedLock liveLock = distributedPrimitiveManager.getDistributedLock(lockAndLongId);
if (!liveLock.isHeldByCaller()) {
final String message = String.format("Server [%s], live lock for NodeID = %s, not held, activation sequence cannot be safely changed",
activeMQServer, lockAndLongId);
logger.info(message);
throw new UnavailableStateException(message);
}
final long nodeActivationSequence = nodeManager.readNodeActivationSequence();
final MutableLong coordinatedNodeActivationSequence = distributedPrimitiveManager.getMutableLong(lockAndLongId);
final long currentCoordinatedActivationSequence = coordinatedNodeActivationSequence.get();
final long nextActivationSequence;
if (currentCoordinatedActivationSequence < 0) {
// Check Self-Repair
if (nodeActivationSequence != -currentCoordinatedActivationSequence) {
final String message = String.format("Server [%s], cannot assume live role for NodeID = %s, local activation sequence %d does not match current claimed coordinated sequence %d: need repair",
activeMQServer, lockAndLongId, nodeActivationSequence, -currentCoordinatedActivationSequence);
logger.info(message);
throw new ActiveMQException(message);
}
// auto-repair: this is the same server that failed to commit its claimed sequence
nextActivationSequence = nodeActivationSequence;
} else {
// Check InSync
if (nodeActivationSequence != currentCoordinatedActivationSequence) {
final String message = String.format("Server [%s], cannot assume live role for NodeID = %s, local activation sequence %d does not match current coordinated sequence %d",
activeMQServer, lockAndLongId, nodeActivationSequence, currentCoordinatedActivationSequence);
logger.info(message);
throw new ActiveMQException(message);
}
nextActivationSequence = nodeActivationSequence + 1;
}
// UN_REPLICATED STATE ENTER: auto-repair doesn't need to claim and write locally
if (nodeActivationSequence != nextActivationSequence) {
// claim
if (!coordinatedNodeActivationSequence.compareAndSet(nodeActivationSequence, -nextActivationSequence)) {
final String message = String.format("Server [%s], cannot assume live role for NodeID = %s, activation sequence claim failed, local activation sequence %d no longer matches current coordinated sequence %d",
activeMQServer, lockAndLongId, nodeActivationSequence, coordinatedNodeActivationSequence.get());
logger.infof(message);
throw new ActiveMQException(message);
}
// claim success: write locally
try {
nodeManager.writeNodeActivationSequence(nextActivationSequence);
} catch (NodeManager.NodeManagerException fatal) {
logger.errorf("Server [%s] failed to set local activation sequence to: %d for NodeId =%s. Cannot continue committing coordinated activation sequence: REQUIRES ADMIN INTERVENTION",
activeMQServer, nextActivationSequence, lockAndLongId);
throw new UnavailableStateException(fatal);
}
logger.infof("Server [%s], incremented local activation sequence to: %d for NodeId = %s",
activeMQServer, nextActivationSequence, lockAndLongId);
} else {
// self-heal need to update the in-memory sequence, because no writes will do it
nodeManager.setNodeActivationSequence(nextActivationSequence);
}
// commit
if (!coordinatedNodeActivationSequence.compareAndSet(-nextActivationSequence, nextActivationSequence)) {
final String message = String.format("Server [%s], cannot assume live role for NodeID = %s, activation sequence commit failed, local activation sequence %d no longer matches current coordinated sequence %d",
activeMQServer, lockAndLongId, nodeActivationSequence, coordinatedNodeActivationSequence.get());
logger.infof(message);
throw new ActiveMQException(message);
}
logger.infof("Server [%s], incremented coordinated activation sequence to: %d for NodeId = %s",
activeMQServer, nextActivationSequence, lockAndLongId);
}
}

View File

@ -244,7 +244,9 @@ public class ManagementServiceImpl implements ManagementService {
ObjectName objectName = objectNameBuilder.getActiveMQServerObjectName();
unregisterFromJMX(objectName);
unregisterFromRegistry(ResourceNames.BROKER);
unregisterMeters(ResourceNames.BROKER + "." + messagingServer.getConfiguration().getName());
if (messagingServer != null) {
unregisterMeters(ResourceNames.BROKER + "." + messagingServer.getConfiguration().getName());
}
}
@Override

View File

@ -2605,7 +2605,7 @@
</xsd:annotation>
<xsd:complexType>
<xsd:sequence>
<xsd:element name="data-source-property" type="dataSourcePropertyType" minOccurs="1" maxOccurs="unbounded">
<xsd:element name="data-source-property" type="propertyType" minOccurs="1" maxOccurs="unbounded">
<xsd:annotation>
<xsd:documentation>
A key-value pair option for the DataSource
@ -2682,7 +2682,7 @@
<xsd:attributeGroup ref="xml:specialAttrs"/>
</xsd:complexType>
<xsd:complexType name="dataSourcePropertyType">
<xsd:complexType name="propertyType">
<xsd:attribute name="key" type="xsd:string" use="required">
<xsd:annotation>
<xsd:documentation>
@ -2726,6 +2726,36 @@
<xsd:attributeGroup ref="xml:specialAttrs"/>
</xsd:complexType>
<xsd:complexType name="distributed-primitive-manager">
<xsd:all>
<xsd:element name="class-name" type="xsd:string" minOccurs="0" maxOccurs="1">
<xsd:annotation>
<xsd:documentation>
The distributed-primitive-manager class name
</xsd:documentation>
</xsd:annotation>
</xsd:element>
<xsd:element name="properties" minOccurs="0" maxOccurs="1">
<xsd:annotation>
<xsd:documentation>
A list of options for the distributed-primitive-manager
</xsd:documentation>
</xsd:annotation>
<xsd:complexType>
<xsd:sequence>
<xsd:element name="property" type="propertyType" minOccurs="1" maxOccurs="unbounded">
<xsd:annotation>
<xsd:documentation>
A key-value pair option for the distributed-primitive-manager
</xsd:documentation>
</xsd:annotation>
</xsd:element>
</xsd:sequence>
</xsd:complexType>
</xsd:element>
</xsd:all>
</xsd:complexType>
<xsd:complexType name="haReplicationType">
<xsd:choice>
<xsd:element name="master" type="replicatedPolicyType" minOccurs="0" maxOccurs="1">
@ -2749,6 +2779,20 @@
</xsd:documentation>
</xsd:annotation>
</xsd:element>
<xsd:element name="primary" type="asyncPrimaryPolicyType" minOccurs="0" maxOccurs="1">
<xsd:annotation>
<xsd:documentation>
A primary server configured to replicate.
</xsd:documentation>
</xsd:annotation>
</xsd:element>
<xsd:element name="backup" type="asyncBackupPolicyType" minOccurs="0" maxOccurs="1">
<xsd:annotation>
<xsd:documentation>
A backup server configured to replicate.
</xsd:documentation>
</xsd:annotation>
</xsd:element>
</xsd:choice>
<xsd:attributeGroup ref="xml:specialAttrs"/>
</xsd:complexType>
@ -3119,6 +3163,125 @@
</xsd:all>
<xsd:attributeGroup ref="xml:specialAttrs"/>
</xsd:complexType>
<xsd:complexType name="asyncPrimaryPolicyType">
<xsd:all>
<xsd:element name="manager" type="distributed-primitive-manager" minOccurs="1" maxOccurs="1">
<xsd:annotation>
<xsd:documentation>
It's the manager used to manager distributed locks used for this type of replication.
</xsd:documentation>
</xsd:annotation>
</xsd:element>
<xsd:element name="group-name" type="xsd:string" minOccurs="0" maxOccurs="1">
<xsd:annotation>
<xsd:documentation>
used for replication, if set, (remote) backup servers will only pair with live servers with matching
group-name
</xsd:documentation>
</xsd:annotation>
</xsd:element>
<xsd:element name="cluster-name" type="xsd:string" maxOccurs="1" minOccurs="0">
<xsd:annotation>
<xsd:documentation>
Name of the cluster configuration to use for replication. This setting is only necessary in case you
configure multiple cluster connections. It is used by a replicating backups and by live servers that
may attempt fail-back.
</xsd:documentation>
</xsd:annotation>
</xsd:element>
<xsd:element name="coordination-id" type="xsd:string" maxOccurs="1" minOccurs="0">
<xsd:annotation>
<xsd:documentation>
The common identity to use for coordination that is shared across instances that will replicate.
The value will be used as the internal server nodeId and as the identity of entities in the
distributed-primitive-manager.
</xsd:documentation>
</xsd:annotation>
</xsd:element>
<xsd:element name="initial-replication-sync-timeout" type="xsd:long" default="30000" maxOccurs="1"
minOccurs="0">
<xsd:annotation>
<xsd:documentation>
The amount of time to wait for the replica to acknowledge it has received all the necessary data from
the replicating server at the final step of the initial replication synchronization process.
</xsd:documentation>
</xsd:annotation>
</xsd:element>
<xsd:element name="retry-replication-wait" type="xsd:long" default="2000" minOccurs="0" maxOccurs="1">
<xsd:annotation>
<xsd:documentation>
If we start as a replica how long to wait (in milliseconds) before trying to replicate again after failing to find a replica
</xsd:documentation>
</xsd:annotation>
</xsd:element>
</xsd:all>
<xsd:attributeGroup ref="xml:specialAttrs"/>
</xsd:complexType>
<xsd:complexType name="asyncBackupPolicyType">
<xsd:all>
<xsd:element name="manager" type="distributed-primitive-manager" minOccurs="1" maxOccurs="1">
<xsd:annotation>
<xsd:documentation>
It's the manager used to manager distributed locks used for this type of replication.
</xsd:documentation>
</xsd:annotation>
</xsd:element>
<xsd:element name="group-name" type="xsd:string" minOccurs="0" maxOccurs="1">
<xsd:annotation>
<xsd:documentation>
used for replication, if set, (remote) backup servers will only pair with live servers with matching
group-name
</xsd:documentation>
</xsd:annotation>
</xsd:element>
<xsd:element name="cluster-name" type="xsd:string" maxOccurs="1" minOccurs="0">
<xsd:annotation>
<xsd:documentation>
Name of the cluster configuration to use for replication. This setting is only necessary in case you
configure multiple cluster connections. It is used by a replicating backups and by live servers that
may attempt fail-back.
</xsd:documentation>
</xsd:annotation>
</xsd:element>
<xsd:element name="max-saved-replicated-journals-size" type="xsd:int" default="2" maxOccurs="1" minOccurs="0">
<xsd:annotation>
<xsd:documentation>
This specifies how many times a replicated backup server can restart after moving its files on start.
Once there are this number of backup journal files the server will stop permanently after if fails
back.
</xsd:documentation>
</xsd:annotation>
</xsd:element>
<xsd:element name="allow-failback" type="xsd:boolean" default="true" maxOccurs="1" minOccurs="0">
<xsd:annotation>
<xsd:documentation>
Whether a server will automatically stop when a another places a request to take over
its place. The use case is when a regular server stops and its backup takes over its
duties, later the main server restarts and requests the server (the former backup) to
stop operating.
</xsd:documentation>
</xsd:annotation>
</xsd:element>
<xsd:element name="initial-replication-sync-timeout" type="xsd:long" default="30000" maxOccurs="1"
minOccurs="0">
<xsd:annotation>
<xsd:documentation>
If we have to start as a replicated server this is the amount of time to wait for the replica to
acknowledge it has received all the necessary data from the replicating server at the final step
of the initial replication synchronization process.
</xsd:documentation>
</xsd:annotation>
</xsd:element>
<xsd:element name="retry-replication-wait" type="xsd:long" default="2000" minOccurs="0" maxOccurs="1">
<xsd:annotation>
<xsd:documentation>
How long to wait (in milliseconds) before trying to replicate again after failing to find a replica
</xsd:documentation>
</xsd:annotation>
</xsd:element>
</xsd:all>
<xsd:attributeGroup ref="xml:specialAttrs"/>
</xsd:complexType>
<xsd:complexType name="colocatedReplicaPolicyType">
<xsd:all>
<xsd:element name="group-name" type="xsd:string" minOccurs="0" maxOccurs="1">

View File

@ -17,7 +17,12 @@
package org.apache.activemq.artemis.core.config.impl;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import org.apache.activemq.artemis.api.config.ActiveMQDefaultConfiguration;
import org.apache.activemq.artemis.core.config.Configuration;
import org.apache.activemq.artemis.core.config.FileDeploymentManager;
import org.apache.activemq.artemis.core.config.HAPolicyConfiguration;
@ -27,6 +32,8 @@ import org.apache.activemq.artemis.core.server.cluster.ha.HAPolicy;
import org.apache.activemq.artemis.core.server.cluster.ha.LiveOnlyPolicy;
import org.apache.activemq.artemis.core.server.cluster.ha.ReplicaPolicy;
import org.apache.activemq.artemis.core.server.cluster.ha.ReplicatedPolicy;
import org.apache.activemq.artemis.core.server.cluster.ha.ReplicationBackupPolicy;
import org.apache.activemq.artemis.core.server.cluster.ha.ReplicationPrimaryPolicy;
import org.apache.activemq.artemis.core.server.cluster.ha.ScaleDownPolicy;
import org.apache.activemq.artemis.core.server.cluster.ha.SharedStoreMasterPolicy;
import org.apache.activemq.artemis.core.server.cluster.ha.SharedStoreSlavePolicy;
@ -35,11 +42,19 @@ import org.apache.activemq.artemis.core.server.impl.ActiveMQServerImpl;
import org.apache.activemq.artemis.core.server.impl.ColocatedActivation;
import org.apache.activemq.artemis.core.server.impl.FileLockNodeManager;
import org.apache.activemq.artemis.core.server.impl.LiveOnlyActivation;
import org.apache.activemq.artemis.core.server.impl.ReplicationBackupActivation;
import org.apache.activemq.artemis.core.server.impl.ReplicationPrimaryActivation;
import org.apache.activemq.artemis.core.server.impl.SharedNothingBackupActivation;
import org.apache.activemq.artemis.core.server.impl.SharedNothingLiveActivation;
import org.apache.activemq.artemis.core.server.impl.SharedStoreBackupActivation;
import org.apache.activemq.artemis.core.server.impl.SharedStoreLiveActivation;
import org.apache.activemq.artemis.quorum.DistributedLock;
import org.apache.activemq.artemis.quorum.DistributedPrimitiveManager;
import org.apache.activemq.artemis.quorum.MutableLong;
import org.apache.activemq.artemis.quorum.UnavailableStateException;
import org.apache.activemq.artemis.tests.util.ActiveMQTestBase;
import org.hamcrest.MatcherAssert;
import org.hamcrest.core.IsInstanceOf;
import org.junit.Test;
import static org.hamcrest.CoreMatchers.instanceOf;
@ -124,6 +139,242 @@ public class HAPolicyConfigurationTest extends ActiveMQTestBase {
liveOnlyTest("live-only-hapolicy-config5.xml");
}
public static class FakeDistributedPrimitiveManager implements DistributedPrimitiveManager {
private final Map<String, String> config;
private boolean started;
private DistributedLock lock;
public FakeDistributedPrimitiveManager(Map<String, String> config) {
this.config = config;
this.started = false;
}
public Map<String, String> getConfig() {
return config;
}
@Override
public void addUnavailableManagerListener(UnavailableManagerListener listener) {
// no op
}
@Override
public void removeUnavailableManagerListener(UnavailableManagerListener listener) {
// no op
}
@Override
public boolean start(long timeout, TimeUnit unit) throws InterruptedException, ExecutionException {
started = true;
return true;
}
@Override
public void start() throws InterruptedException, ExecutionException {
started = true;
}
@Override
public boolean isStarted() {
return started;
}
@Override
public void stop() {
started = false;
if (lock != null) {
lock.close();
}
lock = null;
}
@Override
public DistributedLock getDistributedLock(String lockId) {
if (!started) {
throw new IllegalStateException("need to start first");
}
if (lock == null) {
lock = new DistributedLock() {
private boolean held;
@Override
public String getLockId() {
return lockId;
}
@Override
public boolean isHeldByCaller() throws UnavailableStateException {
return held;
}
@Override
public boolean tryLock() throws UnavailableStateException, InterruptedException {
if (held) {
return false;
}
held = true;
return true;
}
@Override
public void unlock() throws UnavailableStateException {
held = false;
}
@Override
public void addListener(UnavailableLockListener listener) {
}
@Override
public void removeListener(UnavailableLockListener listener) {
}
@Override
public void close() {
held = false;
}
};
} else if (!lock.getLockId().equals(lockId)) {
throw new IllegalStateException("This shouldn't happen");
}
return lock;
}
@Override
public MutableLong getMutableLong(String mutableLongId) throws InterruptedException, ExecutionException, TimeoutException {
// TODO
return null;
}
@Override
public void close() {
stop();
}
}
private static void validateManagerConfig(Map<String, String> config) {
assertEquals("127.0.0.1:6666", config.get("connect-string"));
assertEquals("16000", config.get("session-ms"));
assertEquals("2000", config.get("connection-ms"));
assertEquals("2", config.get("retries"));
assertEquals("2000", config.get("retries-ms"));
assertEquals("test", config.get("namespace"));
assertEquals("10", config.get("session-percent"));
assertEquals(7, config.size());
}
@Test
public void PrimaryReplicationTest() throws Exception {
Configuration configuration = createConfiguration("primary-hapolicy-config.xml");
ActiveMQServerImpl server = new ActiveMQServerImpl(configuration);
try {
server.start();
Activation activation = server.getActivation();
assertTrue(activation instanceof ReplicationPrimaryActivation);
HAPolicy haPolicy = server.getHAPolicy();
assertTrue(haPolicy instanceof ReplicationPrimaryPolicy);
ReplicationPrimaryPolicy policy = (ReplicationPrimaryPolicy) haPolicy;
assertFalse(policy.isAllowAutoFailBack());
assertEquals(9876, policy.getInitialReplicationSyncTimeout());
assertFalse(policy.canScaleDown());
assertFalse(policy.isBackup());
assertFalse(policy.isSharedStore());
assertTrue(policy.isWaitForActivation());
assertEquals("purple", policy.getGroupName());
assertEquals("purple", policy.getBackupGroupName());
assertEquals("abcdefg", policy.getClusterName());
assertFalse(policy.useQuorumManager());
// check failback companion backup policy
ReplicationBackupPolicy failbackPolicy = policy.getBackupPolicy();
assertNotNull(failbackPolicy);
assertSame(policy, failbackPolicy.getLivePolicy());
assertEquals(policy.getGroupName(), failbackPolicy.getGroupName());
assertEquals(policy.getBackupGroupName(), failbackPolicy.getBackupGroupName());
assertEquals(policy.getClusterName(), failbackPolicy.getClusterName());
assertEquals(failbackPolicy.getMaxSavedReplicatedJournalsSize(), ActiveMQDefaultConfiguration.getDefaultMaxSavedReplicatedJournalsSize());
assertTrue(failbackPolicy.isTryFailback());
assertTrue(failbackPolicy.isBackup());
assertFalse(failbackPolicy.isSharedStore());
assertTrue(failbackPolicy.isWaitForActivation());
assertFalse(failbackPolicy.useQuorumManager());
assertEquals(12345, failbackPolicy.getRetryReplicationWait());
// check scale-down properties
assertFalse(failbackPolicy.canScaleDown());
assertNull(failbackPolicy.getScaleDownClustername());
assertNull(failbackPolicy.getScaleDownGroupName());
// validate manager
DistributedPrimitiveManager manager = ((ReplicationPrimaryActivation) activation).getDistributedManager();
assertNotNull(manager);
assertEquals(FakeDistributedPrimitiveManager.class.getName(), manager.getClass().getName());
MatcherAssert.assertThat(manager, IsInstanceOf.instanceOf(FakeDistributedPrimitiveManager.class));
FakeDistributedPrimitiveManager forwardingManager = (FakeDistributedPrimitiveManager) manager;
// validate manager config
validateManagerConfig(forwardingManager.getConfig());
} finally {
server.stop();
}
}
@Test
public void BackupReplicationTest() throws Exception {
Configuration configuration = createConfiguration("backup-hapolicy-config.xml");
ActiveMQServerImpl server = new ActiveMQServerImpl(configuration);
try {
server.start();
Activation activation = server.getActivation();
assertTrue(activation instanceof ReplicationBackupActivation);
HAPolicy haPolicy = server.getHAPolicy();
assertTrue(haPolicy instanceof ReplicationBackupPolicy);
ReplicationBackupPolicy policy = (ReplicationBackupPolicy) haPolicy;
assertEquals("tiddles", policy.getGroupName());
assertEquals("tiddles", policy.getBackupGroupName());
assertEquals("33rrrrr", policy.getClusterName());
assertEquals(22, policy.getMaxSavedReplicatedJournalsSize());
assertFalse(policy.isTryFailback());
assertTrue(policy.isBackup());
assertFalse(policy.isSharedStore());
assertTrue(policy.isWaitForActivation());
assertFalse(policy.useQuorumManager());
assertEquals(12345, policy.getRetryReplicationWait());
// check scale-down properties
assertFalse(policy.canScaleDown());
assertNull(policy.getScaleDownClustername());
assertNull(policy.getScaleDownGroupName());
// check failover companion live policy
ReplicationPrimaryPolicy failoverLivePolicy = policy.getLivePolicy();
assertNotNull(failoverLivePolicy);
assertSame(policy, failoverLivePolicy.getBackupPolicy());
assertFalse(failoverLivePolicy.isAllowAutoFailBack());
assertEquals(9876, failoverLivePolicy.getInitialReplicationSyncTimeout());
assertFalse(failoverLivePolicy.canScaleDown());
assertFalse(failoverLivePolicy.isBackup());
assertFalse(failoverLivePolicy.isSharedStore());
assertTrue(failoverLivePolicy.isWaitForActivation());
assertEquals(policy.getGroupName(), failoverLivePolicy.getGroupName());
assertEquals(policy.getClusterName(), failoverLivePolicy.getClusterName());
assertEquals(policy.getBackupGroupName(), failoverLivePolicy.getBackupGroupName());
assertFalse(failoverLivePolicy.useQuorumManager());
// check scale-down properties
assertFalse(failoverLivePolicy.canScaleDown());
assertNull(failoverLivePolicy.getScaleDownClustername());
assertNull(failoverLivePolicy.getScaleDownGroupName());
// validate manager
DistributedPrimitiveManager manager = ((ReplicationBackupActivation) activation).getDistributedManager();
assertNotNull(manager);
assertEquals(FakeDistributedPrimitiveManager.class.getName(), manager.getClass().getName());
MatcherAssert.assertThat(manager, IsInstanceOf.instanceOf(FakeDistributedPrimitiveManager.class));
FakeDistributedPrimitiveManager forwardingManager = (FakeDistributedPrimitiveManager) manager;
// validate manager config
validateManagerConfig(forwardingManager.getConfig());
} finally {
server.stop();
}
}
@Test
public void ReplicatedTest() throws Exception {
Configuration configuration = createConfiguration("replicated-hapolicy-config.xml");

View File

@ -113,6 +113,7 @@ import org.apache.activemq.artemis.core.remoting.impl.invm.TransportConstants;
import org.apache.activemq.artemis.core.remoting.impl.netty.NettyAcceptorFactory;
import org.apache.activemq.artemis.core.remoting.impl.netty.NettyConnector;
import org.apache.activemq.artemis.core.remoting.impl.netty.NettyConnectorFactory;
import org.apache.activemq.artemis.core.replication.ReplicationEndpoint;
import org.apache.activemq.artemis.core.server.ActiveMQComponent;
import org.apache.activemq.artemis.core.server.ActiveMQServer;
import org.apache.activemq.artemis.core.server.ActiveMQServerLogger;
@ -129,6 +130,7 @@ import org.apache.activemq.artemis.core.server.impl.Activation;
import org.apache.activemq.artemis.core.server.impl.ActiveMQServerImpl;
import org.apache.activemq.artemis.core.server.impl.AddressInfo;
import org.apache.activemq.artemis.core.server.impl.LiveOnlyActivation;
import org.apache.activemq.artemis.core.server.impl.ReplicationBackupActivation;
import org.apache.activemq.artemis.core.server.impl.SharedNothingBackupActivation;
import org.apache.activemq.artemis.core.settings.impl.AddressFullMessagePolicy;
import org.apache.activemq.artemis.core.settings.impl.AddressSettings;
@ -1384,6 +1386,8 @@ public abstract class ActiveMQTestBase extends Assert {
if (isReplicated) {
if (activation instanceof SharedNothingBackupActivation) {
isRemoteUpToDate = backup.isReplicaSync();
} else if (activation instanceof ReplicationBackupActivation) {
isRemoteUpToDate = backup.isReplicaSync();
} else {
//we may have already failed over and changed the Activation
if (actualServer.isStarted()) {
@ -2517,6 +2521,17 @@ public abstract class ActiveMQTestBase extends Assert {
return !hadToInterrupt;
}
protected static ReplicationEndpoint getReplicationEndpoint(ActiveMQServer server) {
final Activation activation = server.getActivation();
if (activation instanceof SharedNothingBackupActivation) {
return ((SharedNothingBackupActivation) activation).getReplicationEndpoint();
}
if (activation instanceof ReplicationBackupActivation) {
return ((ReplicationBackupActivation) activation).getReplicationEndpoint();
}
return null;
}
// Private -------------------------------------------------------
// Inner classes -------------------------------------------------

View File

@ -0,0 +1,52 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<configuration
xmlns="urn:activemq"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="urn:activemq /schema/artemis-server.xsd">
<core xmlns="urn:activemq:core">
<discovery-groups>
<discovery-group name="wahey"/>
</discovery-groups>
<ha-policy>
<replication>
<backup>
<group-name>tiddles</group-name>
<max-saved-replicated-journals-size>22</max-saved-replicated-journals-size>
<cluster-name>33rrrrr</cluster-name>
<initial-replication-sync-timeout>9876</initial-replication-sync-timeout>
<retry-replication-wait>12345</retry-replication-wait>
<allow-failback>false</allow-failback>
<manager>
<class-name>
org.apache.activemq.artemis.core.config.impl.HAPolicyConfigurationTest$FakeDistributedPrimitiveManager
</class-name>
<properties>
<property key="connect-string" value="127.0.0.1:6666"/>
<property key="session-ms" value="16000"/>
<property key="connection-ms" value="2000"/>
<property key="retries" value="2"/>
<property key="retries-ms" value="2000"/>
<property key="namespace" value="test"/>
<property key="session-percent" value="10"/>
</properties>
</manager>
</backup>
</replication>
</ha-policy>
</core>
</configuration>

View File

@ -0,0 +1,49 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<configuration
xmlns="urn:activemq"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="urn:activemq /schema/artemis-server.xsd">
<core xmlns="urn:activemq:core">
<ha-policy>
<replication>
<primary>
<group-name>purple</group-name>
<cluster-name>abcdefg</cluster-name>
<initial-replication-sync-timeout>9876</initial-replication-sync-timeout>
<retry-replication-wait>12345</retry-replication-wait>
<manager>
<class-name>
org.apache.activemq.artemis.core.config.impl.HAPolicyConfigurationTest$FakeDistributedPrimitiveManager
</class-name>
<properties>
<property key="connect-string" value="127.0.0.1:6666"/>
<property key="session-ms" value="16000"/>
<property key="connection-ms" value="2000"/>
<property key="retries" value="2"/>
<property key="retries-ms" value="2000"/>
<property key="namespace" value="test"/>
<property key="session-percent" value="10"/>
</properties>
</manager>
</primary>
</replication>
</ha-policy>
</core>
</configuration>

View File

@ -65,8 +65,8 @@ which we will cover in a later chapter.
> message data will not be available after failover.
The `ha-policy` type configures which strategy a cluster should use to
provide the backing up of a servers data. Within this configuration
element is configured how a server should behave within the cluster,
provide the backing up of a server's data. Within this configuration
element we configure how a server should behave within the cluster,
either as a master (live), slave (backup) or colocated (both live and
backup). This would look something like:
@ -98,6 +98,33 @@ or
</ha-policy>
```
*Replication* allows the configuration of two new roles to enable *pluggable quorum* provider configuration, by using:
```xml
<ha-policy>
<replication>
<primary/>
</replication>
</ha-policy>
```
to configure the classic *master* role, and
```xml
<ha-policy>
<replication>
<backup/>
</replication>
</ha-policy>
```
for the classic *slave* one.
If *replication* is configured using such new roles some additional element are required to complete configuration as detailed later.
### IMPORTANT NOTE ON PLUGGABLE QUORUM VOTE FEATURE
This feature is still **EXPERIMENTAL**. Extra testing should be done before running this feature into production. Please raise issues eventually found to the ActiveMQ Artemis Mail Lists.
It means:
- it's configuration can change until declared as **officially stable**
### Data Replication
When using replication, the live and the backup servers do not share the
@ -196,19 +223,29 @@ changes and repeats the process.
> live server by changing `slave` to `master`.
Much like in the shared-store case, when the live server stops or
crashes, its replicating backup will become active and take over its
crashes, it's replicating backup will become active and take over its
duties. Specifically, the backup will become active when it loses
connection to its live server. This can be problematic because this can
also happen because of a temporary network problem. In order to address
this issue, the backup will try to determine whether it still can
connection to its live server. This can be problematic because it can
also happen as the result of temporary network problem.
The issue can be solved in two different ways, depending on which replication roles are configured:
- **classic replication** (`master`/`slave` roles): backup will try to determine whether it still can
connect to the other servers in the cluster. If it can connect to more
than half the servers, it will become active, if more than half the
servers also disappeared with the live, the backup will wait and try
reconnecting with the live. This avoids a split brain situation.
- **pluggable quorum vote replication** (`primary`/`backup` roles): backup relies on a pluggable quorum provider
(configurable via `manager` xml element) to detect if there's any active live.
> ***NOTE***
>
> A backup in the **pluggable quorum vote replication** still need to carefully configure
> [connection-ttl](connection-ttl.md) in order to promptly issue a request to become live to the quorum service
> before failing-over.
#### Configuration
To configure the live and backup servers to be a replicating pair,
To configure a classic replication's live and backup servers to be a replicating pair,
configure the live server in ' `broker.xml` to have:
```xml
@ -235,6 +272,30 @@ The backup server must be similarly configured but as a `slave`
</ha-policy>
```
To configure a pluggable quorum replication's primary and backup use:
```xml
<ha-policy>
<replication>
<primary/>
</replication>
</ha-policy>
...
<cluster-connections>
<cluster-connection name="my-cluster">
...
</cluster-connection>
</cluster-connections>
```
and
```xml
<ha-policy>
<replication>
<backup/>
</replication>
</ha-policy>
```
#### All Replication Configuration
The following table lists all the `ha-policy` configuration elements for
@ -308,6 +369,130 @@ replica to acknowledge it has received all the necessary data. The
default is 30,000 milliseconds. **Note:** during this interval any
journal related operations will be blocked.
#### Pluggable Quorum Vote Replication configurations
Pluggable Quorum Vote replication configuration options are a bit different
from classic replication, mostly because of its customizable nature.
[Apache curator](https://curator.apache.org/) is used by the default quorum provider.
Below some example configurations to show how it works.
For `primary`:
```xml
<ha-policy>
<replication>
<primary>
<manager>
<class-name>org.apache.activemq.artemis.quorum.zookeeper.CuratorDistributedPrimitiveManager</class-name>
<properties>
<property key="connect-string" value="127.0.0.1:6666,127.0.0.1:6667,127.0.0.1:6668"/>
</properties>
</manager>
</primary>
</replication>
</ha-policy>
```
And `backup`:
```xml
<ha-policy>
<replication>
<backup>
<manager>
<class-name>org.apache.activemq.artemis.quorum.zookeeper.CuratorDistributedPrimitiveManager</class-name>
<properties>
<property key="connect-string" value="127.0.0.1:6666,127.0.0.1:6667,127.0.0.1:6668"/>
</properties>
</manager>
<allow-failback>true</allow-failback>
</backup>
</replication>
</ha-policy>
```
The configuration of `class-name` as follows
```xml
<class-name>org.apache.activemq.artemis.quorum.zookeeper.CuratorDistributedPrimitiveManager</class-name>
```
isn't really needed, because Apache Curator is the default provider, but has been shown for completeness.
The `properties` element:
```xml
<properties>
<property key="connect-string" value="127.0.0.1:6666,127.0.0.1:6667,127.0.0.1:6668"/>
</properties>
```
can specify a list of `property` elements in the form of key-value pairs, appropriate to what is
supported by the specified `class-name` provider.
Apache Curator's provider allows the following properties:
- [`connect-string`](https://curator.apache.org/apidocs/org/apache/curator/framework/CuratorFrameworkFactory.Builder.html#connectString(java.lang.String)): (no default)
- [`session-ms`](https://curator.apache.org/apidocs/org/apache/curator/framework/CuratorFrameworkFactory.Builder.html#sessionTimeoutMs(int)): (default is 18000 ms)
- [`session-percent`](https://curator.apache.org/apidocs/org/apache/curator/framework/CuratorFrameworkFactory.Builder.html#simulatedSessionExpirationPercent(int)): (default is 33); should be <= default,
see https://cwiki.apache.org/confluence/display/CURATOR/TN14 for more info
- [`connection-ms`](https://curator.apache.org/apidocs/org/apache/curator/framework/CuratorFrameworkFactory.Builder.html#connectionTimeoutMs(int)): (default is 8000 ms)
- [`retries`](https://curator.apache.org/apidocs/org/apache/curator/retry/RetryNTimes.html#%3Cinit%3E(int,int)): (default is 1)
- [`retries-ms`](https://curator.apache.org/apidocs/org/apache/curator/retry/RetryNTimes.html#%3Cinit%3E(int,int)): (default is 1000 ms)
- [`namespace`](https://curator.apache.org/apidocs/org/apache/curator/framework/CuratorFrameworkFactory.Builder.html#namespace(java.lang.String)): (no default)
Configuration of the [Apache Zookeeper](https://zookeeper.apache.org/) ensemble is the responsibility of the user, but there are few
**suggestions to improve the reliability of the quorum service**:
- broker `session_ms` must be `>= 2 * server tick time` and `<= 20 * server tick time` as by
[Zookeeper 3.6.3 admin guide](https://zookeeper.apache.org/doc/r3.6.3/zookeeperAdmin.html): it directly impacts how fast a backup
can failover to an isolated/killed/unresponsive live; the higher, the slower.
- GC on broker machine should allow keeping GC pauses within 1/3 of `session_ms` in order to let the Zookeeper heartbeat protocol
work reliably. If that is not possible, it is better to increase `session_ms`, accepting a slower failover.
- Zookeeper must have enough resources to keep GC (and OS) pauses much smaller than server tick time: please consider carefully if
broker and Zookeeper node should share the same physical machine, depending on the expected load of the broker
- network isolation protection requires configuring >=3 Zookeeper nodes
#### *Important*: Notes on pluggable quorum replication configuration
There are some no longer needed `classic` replication configurations:
- `vote-on-replication-failure`
- `quorum-vote-wait`
- `vote-retries`
- `vote-retries-wait`
- `check-for-live-server`
**Notes on replication configuration with [Apache curator](https://curator.apache.org/) quorum provider**
As said some paragraphs above, `session-ms` affect the failover duration: a backup can
failover after `session-ms` expires or if the live broker voluntary give up its role
eg during a fail-back/manual broker stop, it happens immediately.
For the former case (session expiration with live no longer present), the backup broker can detect an unresponsive live by using:
1. cluster connection PINGs (affected by [connection-ttl](connection-ttl.md) tuning)
2. closed TCP connection notification (depends by TCP configuration and networking stack/topology)
The suggestion is to tune `connection-ttl` low enough to attempt failover as soon as possible, while taking in consideration that
the whole fail-over duration cannot last less than the configured `session-ms`.
##### Peer or Multi Primary
With coordination delegated to the quorum service, roles are less important. It is possible to have two peer servers compete
for activation; the winner activating as live, the looser taking up a backup role. On restart, 'any' peer server
with the most up to date journal can activate.
The instances need to know in advance, what identity they will coordinate on.
In the replication 'primary' ha policy we can explicitly set the 'coordination-id' to a common value for all peers in a cluster.
For `multi primary`:
```xml
<ha-policy>
<replication>
<primary>
<manager>
<class-name>org.apache.activemq.artemis.quorum.zookeeper.CuratorDistributedPrimitiveManager</class-name>
<properties>
<property key="connect-string" value="127.0.0.1:6666,127.0.0.1:6667,127.0.0.1:6668"/>
</properties>
</manager>
<coordination-id>peer-journal-001</coordination-id>
</primary>
</replication>
</ha-policy>
```
Note: the string value provided will be converted internally into a 16 byte UUID, so it may not be immediately recognisable or human-readable,
however it will ensure that all 'peers' coordinate.
### Shared Store
When using a shared store, both live and backup servers share the *same*
@ -406,19 +591,43 @@ stop. This configuration would look like:
</ha-policy>
```
In replication HA mode you need to set an extra property
`check-for-live-server` to `true` in the `master` configuration. If set
The same configuration option can be set for both replications, classic:
```xml
<ha-policy>
<replication>
<slave>
<allow-failback>true</allow-failback>
</slave>
</replication>
</ha-policy>
```
and with pluggable quorum provider:
```xml
<ha-policy>
<replication>
<manager>
<!-- some meaningful configuration -->
</manager>
<backup>
<allow-failback>true</allow-failback>
</backup>
</replication>
</ha-policy>
```
In both replication HA mode you need to set an extra property
`check-for-live-server` to `true` in the `master`/`primary` configuration. If set
to true, during start-up a live server will first search the cluster for
another server using its nodeID. If it finds one, it will contact this
server and try to "fail-back". Since this is a remote replication
scenario, the "starting live" will have to synchronize its data with the
server running with its ID, once they are in sync, it will request the
other server (which it assumes it is a back that has assumed its duties)
to shutdown for it to take over. This is necessary because otherwise the
other server (which it assumes it is a backup that has assumed its duties)
to shutdown, for it to take over. This is necessary because otherwise the
live server has no means to know whether there was a fail-over or not,
and if there was if the server that took its duties is still running or
and if there was, if the server that took its duties is still running or
not. To configure this option at your `broker.xml`
configuration file as follows:
configuration file as follows, for classic replication:
```xml
<ha-policy>
@ -430,7 +639,30 @@ configuration file as follows:
</ha-policy>
```
> **Warning**
And pluggable quorum replication:
```xml
<ha-policy>
<replication>
<manager>
<!-- some meaningful configuration -->
</manager>
<primary>
<!-- no need to check-for-live-server anymore -->
</primary>
</replication>
</ha-policy>
```
The key difference from classic replication is that if `master` cannot reach any
live server with its nodeID, it activates unilaterally.
With `primary`, the responsibilities of coordination are delegated to the quorum provider,
there are no unilateral decisions. The `primary` will only activate when
it knows that it has the most up to date version of the journal identified by its nodeID.
In short: a started `primary` cannot become live without consensus.
> **Warning for classic replication**
>
> Be aware that if you restart a live server while after failover has
> occurred then `check-for-live-server` must be set to `true`. If not the live server

View File

@ -62,6 +62,7 @@ under the License.
<module>scale-down</module>
<module>stop-server-failover</module>
<module>transaction-failover</module>
<module>zookeeper-single-pair-failback</module>
</modules>
</profile>
<profile>
@ -81,6 +82,7 @@ under the License.
<module>replicated-transaction-failover</module>
<module>scale-down</module>
<module>transaction-failover</module>
<module>zookeeper-single-pair-failback</module>
</modules>
</profile>
</profiles>

View File

@ -0,0 +1,110 @@
<?xml version='1.0'?>
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.activemq.examples.failover</groupId>
<artifactId>broker-failover</artifactId>
<version>2.18.0-SNAPSHOT</version>
</parent>
<artifactId>zookeeper-single-pair-ordered-failback</artifactId>
<packaging>jar</packaging>
<name>ActiveMQ Artemis Zookeeper Single Pair Ordered Failback Example</name>
<properties>
<activemq.basedir>${project.basedir}/../../../..</activemq.basedir>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.activemq</groupId>
<artifactId>artemis-cli</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>jakarta.jms</groupId>
<artifactId>jakarta.jms-api</artifactId>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.activemq</groupId>
<artifactId>artemis-maven-plugin</artifactId>
<executions>
<execution>
<id>create0</id>
<goals>
<goal>create</goal>
</goals>
<configuration>
<!-- this makes it easier in certain envs -->
<javaOptions>-Djava.net.preferIPv4Stack=true</javaOptions>
<instance>${basedir}/target/server0</instance>
<configuration>${basedir}/target/classes/activemq/server0</configuration>
<javaOptions>-Dudp-address=${udp-address}</javaOptions>
</configuration>
</execution>
<execution>
<id>create1</id>
<goals>
<goal>create</goal>
</goals>
<configuration>
<!-- this makes it easier in certain envs -->
<javaOptions>-Djava.net.preferIPv4Stack=true</javaOptions>
<instance>${basedir}/target/server1</instance>
<configuration>${basedir}/target/classes/activemq/server1</configuration>
<javaOptions>-Dudp-address=${udp-address}</javaOptions>
</configuration>
</execution>
<execution>
<id>runClient</id>
<goals>
<goal>runClient</goal>
</goals>
<configuration>
<clientClass>org.apache.activemq.artemis.jms.example.ZookeeperSinglePairFailback</clientClass>
<args>
<param>${basedir}/target/server0</param>
<param>${basedir}/target/server1</param>
</args>
</configuration>
</execution>
</executions>
<dependencies>
<dependency>
<groupId>org.apache.activemq.examples.failover</groupId>
<artifactId>zookeeper-single-pair-ordered-failback</artifactId>
<version>2.18.0-SNAPSHOT</version>
</dependency>
</dependencies>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-clean-plugin</artifactId>
</plugin>
</plugins>
</build>
</project>

View File

@ -0,0 +1,179 @@
# Zookeeper Single Pair Failback Example
## Configuring Zookeeper with Docker
This example demonstrates two servers coupled as a primary-backup pair for high availability (HA) using
pluggable quorum vote replication Reference Implementation based on [Apache Curator](https://curator.apache.org/) to use
[Apache Zookeeper](https://zookeeper.apache.org/) as external quorum service.
The example shows a client connection failing over from live to backup when the live broker is crashed and
then back to the original live when it is restarted (i.e. "failback").
To run the example, simply type **mvn verify** from this directory after running a Zookeeper node at `localhost:2181`.
If no Zookeeper node is configured, can use the commands below (see [Official Zookeeper Docker Image Site](https://hub.docker.com/_/zookeeper)
for more details on how configure it).
Run Zookeeper `3.6.3` with:
```
$ docker run --name artemis-zk --network host --restart always -d zookeeper:3.6.3
```
By default, the official docker image exposes `2181 2888 3888 8080` as client, follower, election and AdminServer ports.
Verify Zookeeper server is correctly started by running:
```
$ docker logs --follow artemis-zk
```
It should print the Zookeeper welcome ASCII logs:
```
ZooKeeper JMX enabled by default
Using config: /conf/zoo.cfg
2021-08-05 14:29:29,431 [myid:] - INFO [main:QuorumPeerConfig@174] - Reading configuration from: /conf/zoo.cfg
2021-08-05 14:29:29,434 [myid:] - INFO [main:QuorumPeerConfig@451] - clientPort is not set
2021-08-05 14:29:29,434 [myid:] - INFO [main:QuorumPeerConfig@464] - secureClientPort is not set
2021-08-05 14:29:29,434 [myid:] - INFO [main:QuorumPeerConfig@480] - observerMasterPort is not set
2021-08-05 14:29:29,435 [myid:] - INFO [main:QuorumPeerConfig@497] - metricsProvider.className is org.apache.zookeeper.metrics.impl.DefaultMetricsProvider
2021-08-05 14:29:29,438 [myid:] - ERROR [main:QuorumPeerConfig@722] - Invalid configuration, only one server specified (ignoring)
2021-08-05 14:29:29,441 [myid:1] - INFO [main:DatadirCleanupManager@78] - autopurge.snapRetainCount set to 3
2021-08-05 14:29:29,441 [myid:1] - INFO [main:DatadirCleanupManager@79] - autopurge.purgeInterval set to 0
2021-08-05 14:29:29,441 [myid:1] - INFO [main:DatadirCleanupManager@101] - Purge task is not scheduled.
2021-08-05 14:29:29,441 [myid:1] - WARN [main:QuorumPeerMain@138] - Either no config or no quorum defined in config, running in standalone mode
2021-08-05 14:29:29,444 [myid:1] - INFO [main:ManagedUtil@44] - Log4j 1.2 jmx support found and enabled.
2021-08-05 14:29:29,449 [myid:1] - INFO [main:QuorumPeerConfig@174] - Reading configuration from: /conf/zoo.cfg
2021-08-05 14:29:29,449 [myid:1] - INFO [main:QuorumPeerConfig@451] - clientPort is not set
2021-08-05 14:29:29,449 [myid:1] - INFO [main:QuorumPeerConfig@464] - secureClientPort is not set
2021-08-05 14:29:29,449 [myid:1] - INFO [main:QuorumPeerConfig@480] - observerMasterPort is not set
2021-08-05 14:29:29,450 [myid:1] - INFO [main:QuorumPeerConfig@497] - metricsProvider.className is org.apache.zookeeper.metrics.impl.DefaultMetricsProvider
2021-08-05 14:29:29,450 [myid:1] - ERROR [main:QuorumPeerConfig@722] - Invalid configuration, only one server specified (ignoring)
2021-08-05 14:29:29,451 [myid:1] - INFO [main:ZooKeeperServerMain@122] - Starting server
2021-08-05 14:29:29,459 [myid:1] - INFO [main:ServerMetrics@62] - ServerMetrics initialized with provider org.apache.zookeeper.metrics.impl.DefaultMetricsProvider@525f1e4e
2021-08-05 14:29:29,461 [myid:1] - INFO [main:FileTxnSnapLog@124] - zookeeper.snapshot.trust.empty : false
2021-08-05 14:29:29,467 [myid:1] - INFO [main:ZookeeperBanner@42] -
2021-08-05 14:29:29,467 [myid:1] - INFO [main:ZookeeperBanner@42] - ______ _
2021-08-05 14:29:29,467 [myid:1] - INFO [main:ZookeeperBanner@42] - |___ / | |
2021-08-05 14:29:29,467 [myid:1] - INFO [main:ZookeeperBanner@42] - / / ___ ___ | | __ ___ ___ _ __ ___ _ __
2021-08-05 14:29:29,468 [myid:1] - INFO [main:ZookeeperBanner@42] - / / / _ \ / _ \ | |/ / / _ \ / _ \ | '_ \ / _ \ | '__|
2021-08-05 14:29:29,468 [myid:1] - INFO [main:ZookeeperBanner@42] - / /__ | (_) | | (_) | | < | __/ | __/ | |_) | | __/ | |
2021-08-05 14:29:29,468 [myid:1] - INFO [main:ZookeeperBanner@42] - /_____| \___/ \___/ |_|\_\ \___| \___| | .__/ \___| |_|
2021-08-05 14:29:29,468 [myid:1] - INFO [main:ZookeeperBanner@42] - | |
2021-08-05 14:29:29,468 [myid:1] - INFO [main:ZookeeperBanner@42] - |_|
2021-08-05 14:29:29,468 [myid:1] - INFO [main:ZookeeperBanner@42] -
```
Alternatively, this command could be executed:
```
$ docker run -it --rm --network host zookeeper:3.6.3 zkCli.sh -server localhost:2181
```
Zookeeper server can be reached using localhost:2181 if it output something like:
```
2021-08-05 14:56:03,739 [myid:localhost:2181] - INFO [main-SendThread(localhost:2181):ClientCnxn$SendThread@1448] - Session establishment complete on server localhost/0:0:0:0:0:0:0:1:2181, session id = 0x100078b8cfc0002, negotiated timeout = 30000
```
Type
```
[zk: localhost:2181(CONNECTED) 0] quit
```
to quit the client instance.
## Configuring zookeeper bare metal
It is possible to run zooKeeper in a bare metal instance for this example as well.
Simply download [Zookeeper](https://zookeeper.apache.org/releases.html), and use the following zoo.cfg under ./apache-zookeeper/conf:
```shell
# The number of milliseconds of each tick
tickTime=2000
# The number of ticks that the initial
# synchronization phase can take
initLimit=10
# The number of ticks that can pass between
# sending a request and getting an acknowledgement
syncLimit=5
# the directory where the snapshot is stored.
# do not use /tmp for storage, /tmp here is just
# example sakes.
dataDir=/tmp/datazookeeper
# the port at which the clients will connect
clientPort=2181
```
And use one of the shells to start Zookeeper such as:
```shell
# From the bin folder under the apache-zookeeper distribution folder
$ ./zkServer.sh start-foreground
```
And zookeeper would run normally:
```
2021-08-05 14:10:16,902 [myid:] - INFO [main:DigestAuthenticationProvider@47] - ACL digest algorithm is: SHA1
2021-08-05 14:10:16,902 [myid:] - INFO [main:DigestAuthenticationProvider@61] - zookeeper.DigestAuthenticationProvider.enabled = true
2021-08-05 14:10:16,905 [myid:] - INFO [main:FileTxnSnapLog@124] - zookeeper.snapshot.trust.empty : false
2021-08-05 14:10:16,917 [myid:] - INFO [main:ZookeeperBanner@42] -
2021-08-05 14:10:16,917 [myid:] - INFO [main:ZookeeperBanner@42] - ______ _
2021-08-05 14:10:16,917 [myid:] - INFO [main:ZookeeperBanner@42] - |___ / | |
2021-08-05 14:10:16,917 [myid:] - INFO [main:ZookeeperBanner@42] - / / ___ ___ | | __ ___ ___ _ __ ___ _ __
2021-08-05 14:10:16,917 [myid:] - INFO [main:ZookeeperBanner@42] - / / / _ \ / _ \ | |/ / / _ \ / _ \ | '_ \ / _ \ | '__|
2021-08-05 14:10:16,917 [myid:] - INFO [main:ZookeeperBanner@42] - / /__ | (_) | | (_) | | < | __/ | __/ | |_) | | __/ | |
2021-08-05 14:10:16,918 [myid:] - INFO [main:ZookeeperBanner@42] - /_____| \___/ \___/ |_|\_\ \___| \___| | .__/ \___| |_|
2021-08-05 14:10:16,918 [myid:] - INFO [main:ZookeeperBanner@42] - | |
2021-08-05 14:10:16,918 [myid:] - INFO [main:ZookeeperBanner@42] - |_|
2021-08-05 14:10:16,918 [myid:] - INFO [main:ZookeeperBanner@42] -
```
## Configured the brokers
The 2 brokers of this example are already configured to connect to a single Zookeeper node at the mentioned address, thanks to the XML configuration of their `manager`:
```xml
<manager>
<properties>
<property key="connect-string" value="localhost:2181"/>
<property key="namespace" value="examples"/>
<property key="session-ms" value="18000"/>
</properties>
</manager>
```
**NOTE** the `namespace` parameter is used to separate the pair information from others if the Zookeeper node is shared with other applications.
**WARNING** As already recommended on the [High Availability section](https://activemq.apache.org/components/artemis/documentation/latest/ha.html), a production environment needs >= 3 nodes to protect against network partitions.
##Running the example
After Zookeeper is started accordingly to any of the portrayed steps here, this example can be run with
```shell
$ mvn verify
```
```
ZookeeperSinglePairFailback-primary-out:2021-08-05 14:15:50,052 INFO [org.apache.activemq.artemis.core.server] AMQ221020: Started KQUEUE Acceptor at localhost:61616 for protocols [CORE,MQTT,AMQP,HORNETQ,STOMP,OPENWIRE]
server tcp://localhost:61616 started
Started primary
Got message: This is text message 20 (redelivered?: false)
Got message: This is text message 21 (redelivered?: false)
Got message: This is text message 22 (redelivered?: false)
Got message: This is text message 23 (redelivered?: false)
Got message: This is text message 24 (redelivered?: false)
Got message: This is text message 25 (redelivered?: false)
Got message: This is text message 26 (redelivered?: false)
Got message: This is text message 27 (redelivered?: false)
Got message: This is text message 28 (redelivered?: false)
Got message: This is text message 29 (redelivered?: false)
Acknowledged 3d third of messages
**********************************
Killing server java.lang.UNIXProcess@dd025d9
**********************************
**********************************
Killing server java.lang.UNIXProcess@3bea478e
**********************************
[INFO] ------------------------------------------------------------------------
[INFO] BUILD SUCCESS
[INFO] ------------------------------------------------------------------------
[INFO] Total time: 36.629 s
[INFO] Finished at: 2021-08-05T14:15:56-04:00
[INFO] ------------------------------------------------------------------------
clebertsuconic@MacBook-Pro zookeeper-single-pair-failback %
```

View File

@ -0,0 +1,157 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.jms.example;
import javax.jms.Connection;
import javax.jms.ConnectionFactory;
import javax.jms.JMSException;
import javax.jms.MessageConsumer;
import javax.jms.MessageProducer;
import javax.jms.Queue;
import javax.jms.Session;
import javax.jms.TextMessage;
import javax.naming.InitialContext;
import org.apache.activemq.artemis.util.ServerUtil;
/**
* Example of live and replicating backup pair using Zookeeper as the quorum provider.
* <p>
* After both servers are started, the live server is killed and the backup becomes active ("fails-over").
* <p>
* Later the live server is restarted and takes back its position by asking the backup to stop ("fail-back").
*/
public class ZookeeperSinglePairFailback {
private static Process server0;
private static Process server1;
public static void main(final String[] args) throws Exception {
// Step 0. Prepare Zookeeper Evironment as shown on readme.md
final int numMessages = 30;
Connection connection = null;
InitialContext initialContext = null;
try {
server0 = ServerUtil.startServer(args[0], ZookeeperSinglePairFailback.class.getSimpleName() + "-primary", 0, 30000);
server1 = ServerUtil.startServer(args[1], ZookeeperSinglePairFailback.class.getSimpleName() + "-backup", 1, 10000);
// Step 2. Get an initial context for looking up JNDI from the server #1
initialContext = new InitialContext();
// Step 3. Look up the JMS resources from JNDI
Queue queue = (Queue) initialContext.lookup("queue/exampleQueue");
ConnectionFactory connectionFactory = (ConnectionFactory) initialContext.lookup("ConnectionFactory");
// Step 4. Create a JMS Connection
connection = connectionFactory.createConnection();
// Step 5. Create a *non-transacted* JMS Session with client acknowledgement
Session session = connection.createSession(false, Session.CLIENT_ACKNOWLEDGE);
// Step 6. Start the connection to ensure delivery occurs
connection.start();
// Step 7. Create a JMS MessageProducer and a MessageConsumer
MessageProducer producer = session.createProducer(queue);
MessageConsumer consumer = session.createConsumer(queue);
// Step 8. Send some messages to server #1, the live server
for (int i = 0; i < numMessages; i++) {
TextMessage message = session.createTextMessage("This is text message " + i);
producer.send(message);
System.out.println("Sent message: " + message.getText());
}
// Step 9. Receive and acknowledge a third of the sent messages
TextMessage message0 = null;
for (int i = 0; i < numMessages / 3; i++) {
message0 = (TextMessage) consumer.receive(5000);
System.out.println("Got message: " + message0.getText());
}
message0.acknowledge();
System.out.println("Received and acknowledged a third of the sent messages");
// Step 10. Receive the rest third of the sent messages but *do not* acknowledge them yet
for (int i = numMessages / 3; i < numMessages; i++) {
message0 = (TextMessage) consumer.receive(5000);
System.out.println("Got message: " + message0.getText());
}
System.out.println("Received without acknowledged the rest of the sent messages");
Thread.sleep(2000);
// Step 11. Crash server #0, the live server, and wait a little while to make sure
// it has really crashed
ServerUtil.killServer(server0);
System.out.println("Killed primary");
Thread.sleep(2000);
// Step 12. Acknowledging the received messages will fail as failover to the backup server has occurred
try {
message0.acknowledge();
} catch (JMSException e) {
System.out.println("Got (the expected) exception while acknowledging message: " + e.getMessage());
}
// Step 13. Consume again the 2nd third of the messages again. Note that they are not considered as redelivered.
for (int i = numMessages / 3; i < (numMessages / 3) * 2; i++) {
message0 = (TextMessage) consumer.receive(5000);
System.out.printf("Got message: %s (redelivered?: %s)\n", message0.getText(), message0.getJMSRedelivered());
}
// Step 14. Acknowledging them on the failed-over broker works fine
message0.acknowledge();
System.out.println("Acknowledged 2n third of messages");
// Step 15. Restarting primary
server0 = ServerUtil.startServer(args[0], ZookeeperSinglePairFailback.class.getSimpleName() + "-primary", 0, 10000);
System.out.println("Started primary");
// await fail-back to complete
Thread.sleep(4000);
// Step 16. Consuming the 3rd third of the messages. Note that they are not considered as redelivered.
for (int i = (numMessages / 3) * 2; i < numMessages; i++) {
message0 = (TextMessage) consumer.receive(5000);
System.out.printf("Got message: %s (redelivered?: %s)\n", message0.getText(), message0.getJMSRedelivered());
}
message0.acknowledge();
System.out.println("Acknowledged 3d third of messages");
} finally {
// Step 17. Be sure to close our resources!
if (connection != null) {
connection.close();
}
if (initialContext != null) {
initialContext.close();
}
ServerUtil.killServer(server0);
ServerUtil.killServer(server1);
// Step 18. stop the ZK server
}
}
}

View File

@ -0,0 +1,90 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<configuration xmlns="urn:activemq" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="urn:activemq /schema/artemis-configuration.xsd">
<core xmlns="urn:activemq:core">
<bindings-directory>./data/bindings</bindings-directory>
<journal-directory>./data/journal</journal-directory>
<large-messages-directory>./data/largemessages</large-messages-directory>
<paging-directory>./data/paging</paging-directory>
<cluster-user>exampleUser</cluster-user>
<cluster-password>secret</cluster-password>
<ha-policy>
<replication>
<primary>
<manager>
<properties>
<property key="connect-string" value="localhost:2181"/>
<property key="namespace" value="examples"/>
<property key="session-ms" value="18000"/>
</properties>
</manager>
</primary>
</replication>
</ha-policy>
<connectors>
<connector name="netty-connector">tcp://localhost:61616</connector>
<connector name="netty-backup-connector">tcp://localhost:61617</connector>
</connectors>
<!-- Acceptors -->
<acceptors>
<acceptor name="netty-acceptor">tcp://localhost:61616</acceptor>
</acceptors>
<cluster-connections>
<cluster-connection name="my-cluster">
<connector-ref>netty-connector</connector-ref>
<static-connectors>
<connector-ref>netty-backup-connector</connector-ref>
</static-connectors>
</cluster-connection>
</cluster-connections>
<!-- Other config -->
<security-settings>
<!--security for example queue-->
<security-setting match="exampleQueue">
<permission roles="guest" type="createDurableQueue"/>
<permission roles="guest" type="deleteDurableQueue"/>
<permission roles="guest" type="createNonDurableQueue"/>
<permission roles="guest" type="deleteNonDurableQueue"/>
<permission roles="guest" type="consume"/>
<permission roles="guest" type="send"/>
</security-setting>
</security-settings>
<addresses>
<address name="exampleQueue">
<anycast>
<queue name="exampleQueue"/>
</anycast>
</address>
</addresses>
</core>
</configuration>

View File

@ -0,0 +1,91 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<configuration xmlns="urn:activemq" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="urn:activemq /schema/artemis-configuration.xsd">
<core xmlns="urn:activemq:core">
<bindings-directory>./data/bindings</bindings-directory>
<journal-directory>./data/journal</journal-directory>
<large-messages-directory>./data/largemessages</large-messages-directory>
<paging-directory>./data/paging</paging-directory>
<cluster-user>exampleUser</cluster-user>
<cluster-password>secret</cluster-password>
<ha-policy>
<replication>
<backup>
<manager>
<properties>
<property key="connect-string" value="localhost:2181"/>
<property key="namespace" value="examples"/>
<property key="session-ms" value="18000"/>
</properties>
</manager>
<allow-failback>true</allow-failback>
</backup>
</replication>
</ha-policy>
<!-- Connectors -->
<connectors>
<connector name="netty-live-connector">tcp://localhost:61616</connector>
<connector name="netty-connector">tcp://localhost:61617</connector>
</connectors>
<!-- Acceptors -->
<acceptors>
<acceptor name="netty-acceptor">tcp://localhost:61617</acceptor>
</acceptors>
<cluster-connections>
<cluster-connection name="my-cluster">
<connector-ref>netty-connector</connector-ref>
<static-connectors>
<connector-ref>netty-live-connector</connector-ref>
</static-connectors>
</cluster-connection>
</cluster-connections>
<!-- Other config -->
<security-settings>
<!--security for example queue-->
<security-setting match="exampleQueue">
<permission roles="guest" type="createDurableQueue"/>
<permission roles="guest" type="deleteDurableQueue"/>
<permission roles="guest" type="createNonDurableQueue"/>
<permission roles="guest" type="deleteNonDurableQueue"/>
<permission roles="guest" type="consume"/>
<permission roles="guest" type="send"/>
</security-setting>
</security-settings>
<addresses>
<address name="exampleQueue">
<anycast>
<queue name="exampleQueue"/>
</anycast>
</address>
</addresses>
</core>
</configuration>

View File

@ -0,0 +1,20 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
java.naming.factory.initial=org.apache.activemq.artemis.jndi.ActiveMQInitialContextFactory
connectionFactory.ConnectionFactory=tcp://localhost:61616?ha=true&retryInterval=1000&retryIntervalMultiplier=1.0&reconnectAttempts=-1
queue.queue/exampleQueue=exampleQueue

31
pom.xml
View File

@ -64,6 +64,8 @@
<module>artemis-distribution</module>
<module>tests</module>
<module>artemis-features</module>
<module>artemis-quorum-api</module>
<module>artemis-quorum-ri</module>
</modules>
<name>ActiveMQ Artemis Parent</name>
@ -105,6 +107,9 @@
<mockito.version>3.11.2</mockito.version>
<jctools.version>2.1.2</jctools.version>
<netty.version>4.1.66.Final</netty.version>
<curator.version>5.1.0</curator.version>
<!-- While waiting https://issues.apache.org/jira/browse/CURATOR-595 fix -->
<zookeeper.version>3.6.3</zookeeper.version>
<!-- this is basically for tests -->
<netty-tcnative-version>2.0.40.Final</netty-tcnative-version>
@ -851,6 +856,32 @@
<artifactId>jakarta.security.auth.message-api</artifactId>
<version>${jakarta.security.auth.message-api.version}</version>
</dependency>
<!-- Curator Zookeeper RI -->
<dependency>
<groupId>org.apache.curator</groupId>
<artifactId>curator-recipes</artifactId>
<version>${curator.version}</version>
</dependency>
<dependency>
<groupId>org.apache.curator</groupId>
<artifactId>curator-framework</artifactId>
<version>${curator.version}</version>
</dependency>
<dependency>
<groupId>org.apache.curator</groupId>
<artifactId>curator-client</artifactId>
<version>${curator.version}</version>
</dependency>
<dependency>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
<version>${zookeeper.version}</version>
</dependency>
<dependency>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper-jute</artifactId>
<version>${zookeeper.version}</version>
</dependency>
</dependencies>
</dependencyManagement>

View File

@ -44,9 +44,9 @@ public class ScaleDownFailoverTest extends ClusterTestBase {
public void setUp() throws Exception {
super.setUp();
stopCount = 0;
setupLiveServer(0, isFileStorage(), false, isNetty(), true);
setupLiveServer(1, isFileStorage(), false, isNetty(), true);
setupLiveServer(2, isFileStorage(), false, isNetty(), true);
setupLiveServer(0, isFileStorage(), HAType.SharedNothingReplication, isNetty(), true);
setupLiveServer(1, isFileStorage(), HAType.SharedNothingReplication, isNetty(), true);
setupLiveServer(2, isFileStorage(), HAType.SharedNothingReplication, isNetty(), true);
ScaleDownConfiguration scaleDownConfiguration = new ScaleDownConfiguration();
ScaleDownConfiguration scaleDownConfiguration2 = new ScaleDownConfiguration();
scaleDownConfiguration2.setEnabled(false);

View File

@ -35,8 +35,8 @@ public class ScaleDownFailureTest extends ClusterTestBase {
@Before
public void setUp() throws Exception {
super.setUp();
setupLiveServer(0, isFileStorage(), false, isNetty(), true);
setupLiveServer(1, isFileStorage(), false, isNetty(), true);
setupLiveServer(0, isFileStorage(), HAType.SharedNothingReplication, isNetty(), true);
setupLiveServer(1, isFileStorage(), HAType.SharedNothingReplication, isNetty(), true);
if (isGrouped()) {
ScaleDownConfiguration scaleDownConfiguration = new ScaleDownConfiguration();
scaleDownConfiguration.setGroupName("bill");

View File

@ -51,6 +51,12 @@
<scope>test</scope>
<type>test-jar</type>
</dependency>
<dependency>
<groupId>org.apache.activemq</groupId>
<artifactId>artemis-quorum-ri</artifactId>
<version>${project.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.activemq.tests</groupId>
<artifactId>unit-tests</artifactId>

View File

@ -77,7 +77,6 @@ public class InfiniteRedeliveryTest extends ActiveMQTestBase {
Configuration backupConfig;
Configuration liveConfig;
NodeManager nodeManager;
protected TestableServer createTestableServer(Configuration config, NodeManager nodeManager) throws Exception {
boolean isBackup = config.getHAPolicyConfiguration() instanceof ReplicaPolicyConfiguration || config.getHAPolicyConfiguration() instanceof SharedStoreSlavePolicyConfiguration;
@ -93,20 +92,25 @@ public class InfiniteRedeliveryTest extends ActiveMQTestBase {
backupConfig = createDefaultConfig(0, true);
liveConfig = createDefaultConfig(0, true);
ReplicatedBackupUtils.configureReplicationPair(backupConfig, backupConnector, backupAcceptor, liveConfig, liveConnector, null);
configureReplicationPair(backupConnector, backupAcceptor, liveConnector);
backupConfig.setBindingsDirectory(getBindingsDir(0, true)).setJournalDirectory(getJournalDir(0, true)).setPagingDirectory(getPageDir(0, true)).setLargeMessagesDirectory(getLargeMessagesDir(0, true)).setSecurityEnabled(false);
((ReplicaPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setMaxSavedReplicatedJournalsSize(-1).setAllowFailBack(true);
((ReplicaPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setRestartBackup(false);
nodeManager = new InVMNodeManager(true, backupConfig.getJournalLocation());
backupServer = createTestableServer(backupConfig, nodeManager);
backupServer = createTestableServer(backupConfig, new InVMNodeManager(true, backupConfig.getJournalLocation()));
liveConfig.clearAcceptorConfigurations().addAcceptorConfiguration(TransportConfigurationUtils.getNettyAcceptor(true, 0));
liveServer = createTestableServer(liveConfig, nodeManager);
liveServer = createTestableServer(liveConfig, new InVMNodeManager(false, liveConfig.getJournalLocation()));
}
protected void configureReplicationPair(TransportConfiguration backupConnector,
TransportConfiguration backupAcceptor,
TransportConfiguration liveConnector) {
ReplicatedBackupUtils.configureReplicationPair(backupConfig, backupConnector, backupAcceptor, liveConfig, liveConnector, null);
((ReplicaPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setMaxSavedReplicatedJournalsSize(-1).setAllowFailBack(true);
((ReplicaPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setRestartBackup(false);
}

View File

@ -0,0 +1,55 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.tests.integration.client;
import java.util.Collections;
import org.apache.activemq.artemis.api.core.TransportConfiguration;
import org.apache.activemq.artemis.core.config.ha.DistributedPrimitiveManagerConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
import org.apache.activemq.artemis.quorum.file.FileBasedPrimitiveManager;
import org.apache.activemq.artemis.tests.util.ReplicatedBackupUtils;
import org.junit.Before;
public class PluggableQuorumInfiniteRedeliveryTest extends InfiniteRedeliveryTest {
private DistributedPrimitiveManagerConfiguration managerConfiguration;
public PluggableQuorumInfiniteRedeliveryTest(String protocol, boolean useCLI) {
super(protocol, useCLI);
}
@Before
@Override
public void setUp() throws Exception {
super.setUp();
this.managerConfiguration = new DistributedPrimitiveManagerConfiguration(FileBasedPrimitiveManager.class.getName(),
Collections.singletonMap("locks-folder", temporaryFolder.newFolder("manager").toString()));
}
@Override
protected void configureReplicationPair(TransportConfiguration backupConnector,
TransportConfiguration backupAcceptor,
TransportConfiguration liveConnector) {
ReplicatedBackupUtils.configurePluggableQuorumReplicationPair(backupConfig, backupConnector, backupAcceptor,
liveConfig, liveConnector, null,
managerConfiguration, managerConfiguration);
((ReplicationBackupPolicyConfiguration) backupConfig.getHAPolicyConfiguration())
.setMaxSavedReplicatedJournalsSize(-1).setAllowFailBack(true);
}
}

View File

@ -17,6 +17,7 @@
package org.apache.activemq.artemis.tests.integration.cluster.distribution;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.net.URI;
@ -56,9 +57,12 @@ import org.apache.activemq.artemis.core.client.impl.TopologyMemberImpl;
import org.apache.activemq.artemis.core.config.ClusterConnectionConfiguration;
import org.apache.activemq.artemis.core.config.Configuration;
import org.apache.activemq.artemis.core.config.HAPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.DistributedPrimitiveManagerConfiguration;
import org.apache.activemq.artemis.core.config.ha.LiveOnlyPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicaPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicatedPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.SharedStoreMasterPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.SharedStoreSlavePolicyConfiguration;
import org.apache.activemq.artemis.core.postoffice.Binding;
@ -85,6 +89,7 @@ import org.apache.activemq.artemis.core.server.group.GroupingHandler;
import org.apache.activemq.artemis.core.server.group.impl.GroupingHandlerConfiguration;
import org.apache.activemq.artemis.core.server.impl.AddressInfo;
import org.apache.activemq.artemis.core.server.impl.InVMNodeManager;
import org.apache.activemq.artemis.quorum.file.FileBasedPrimitiveManager;
import org.apache.activemq.artemis.tests.util.ActiveMQTestBase;
import org.apache.activemq.artemis.utils.PortCheckRule;
import org.jboss.logging.Logger;
@ -134,6 +139,21 @@ public abstract class ClusterTestBase extends ActiveMQTestBase {
return true;
}
private DistributedPrimitiveManagerConfiguration pluggableQuorumConfiguration = null;
private DistributedPrimitiveManagerConfiguration getOrCreatePluggableQuorumConfiguration() {
if (pluggableQuorumConfiguration != null) {
return pluggableQuorumConfiguration;
}
try {
pluggableQuorumConfiguration = new DistributedPrimitiveManagerConfiguration(FileBasedPrimitiveManager.class.getName(), Collections.singletonMap("locks-folder", temporaryFolder.newFolder("manager").toString()));
} catch (IOException ioException) {
log.error(ioException);
return null;
}
return pluggableQuorumConfiguration;
}
@Override
@Before
public void setUp() throws Exception {
@ -159,11 +179,19 @@ public abstract class ClusterTestBase extends ActiveMQTestBase {
}
public enum HAType {
SharedStore, SharedNothingReplication, PluggableQuorumReplication
}
protected HAType haType() {
return HAType.SharedNothingReplication;
}
/**
* Whether the servers share the storage or not.
*/
protected boolean isSharedStore() {
return false;
protected final boolean isSharedStore() {
return HAType.SharedStore.equals(haType());
}
@Override
@ -1481,14 +1509,14 @@ public abstract class ClusterTestBase extends ActiveMQTestBase {
}
protected void setupServer(final int node, final boolean fileStorage, final boolean netty) throws Exception {
setupLiveServer(node, fileStorage, false, netty, false);
setupLiveServer(node, fileStorage, HAType.SharedNothingReplication, netty, false);
}
protected void setupLiveServer(final int node,
final boolean fileStorage,
final boolean netty,
boolean isLive) throws Exception {
setupLiveServer(node, fileStorage, false, netty, isLive);
setupLiveServer(node, fileStorage, HAType.SharedNothingReplication, netty, isLive);
}
protected boolean isResolveProtocols() {
@ -1497,27 +1525,26 @@ public abstract class ClusterTestBase extends ActiveMQTestBase {
protected void setupLiveServer(final int node,
final boolean fileStorage,
final boolean sharedStorage,
final HAType haType,
final boolean netty,
boolean liveOnly) throws Exception {
if (servers[node] != null) {
throw new IllegalArgumentException("Already a server at node " + node);
}
HAPolicyConfiguration haPolicyConfiguration = null;
final HAPolicyConfiguration haPolicyConfiguration;
if (liveOnly) {
haPolicyConfiguration = new LiveOnlyPolicyConfiguration();
} else {
if (sharedStorage)
haPolicyConfiguration = new SharedStoreMasterPolicyConfiguration();
else
haPolicyConfiguration = new ReplicatedPolicyConfiguration();
haPolicyConfiguration = haPolicyLiveConfiguration(haType);
}
Configuration configuration = createBasicConfig(node).setJournalMaxIO_AIO(1000).setThreadPoolMaxSize(10).clearAcceptorConfigurations().addAcceptorConfiguration(createTransportConfiguration(netty, true, generateParams(node, netty))).setHAPolicyConfiguration(haPolicyConfiguration).setResolveProtocols(isResolveProtocols());
ActiveMQServer server;
final boolean sharedStorage = HAType.SharedStore.equals(haType);
if (fileStorage) {
if (sharedStorage) {
server = createInVMFailoverServer(true, configuration, nodeManagers[node], node);
@ -1538,6 +1565,20 @@ public abstract class ClusterTestBase extends ActiveMQTestBase {
servers[node] = addServer(server);
}
private HAPolicyConfiguration haPolicyLiveConfiguration(HAType haType) {
switch (haType) {
case SharedStore:
return new SharedStoreMasterPolicyConfiguration();
case SharedNothingReplication:
return new ReplicatedPolicyConfiguration();
case PluggableQuorumReplication:
return ReplicationPrimaryPolicyConfiguration.withDefault()
.setDistributedManagerConfiguration(getOrCreatePluggableQuorumConfiguration());
default:
throw new AssertionError("Unsupported haType = " + haType);
}
}
/**
* Server lacks a {@link ClusterConnectionConfiguration} necessary for the remote (replicating)
* backup case.
@ -1549,14 +1590,14 @@ public abstract class ClusterTestBase extends ActiveMQTestBase {
* @param node
* @param liveNode
* @param fileStorage
* @param sharedStorage
* @param haType
* @param netty
* @throws Exception
*/
protected void setupBackupServer(final int node,
final int liveNode,
final boolean fileStorage,
final boolean sharedStorage,
final HAType haType,
final boolean netty) throws Exception {
if (servers[node] != null) {
throw new IllegalArgumentException("Already a server at node " + node);
@ -1566,7 +1607,9 @@ public abstract class ClusterTestBase extends ActiveMQTestBase {
TransportConfiguration backupConfig = createTransportConfiguration(netty, false, generateParams(node, netty));
TransportConfiguration acceptorConfig = createTransportConfiguration(netty, true, generateParams(node, netty));
Configuration configuration = createBasicConfig(sharedStorage ? liveNode : node).clearAcceptorConfigurations().addAcceptorConfiguration(acceptorConfig).addConnectorConfiguration(liveConfig.getName(), liveConfig).addConnectorConfiguration(backupConfig.getName(), backupConfig).setHAPolicyConfiguration(sharedStorage ? new SharedStoreSlavePolicyConfiguration() : new ReplicaPolicyConfiguration());
final boolean sharedStorage = HAType.SharedStore.equals(haType);
Configuration configuration = createBasicConfig(sharedStorage ? liveNode : node).clearAcceptorConfigurations().addAcceptorConfiguration(acceptorConfig).addConnectorConfiguration(liveConfig.getName(), liveConfig).addConnectorConfiguration(backupConfig.getName(), backupConfig).setHAPolicyConfiguration(haPolicyBackupConfiguration(haType));
ActiveMQServer server;
@ -1580,6 +1623,21 @@ public abstract class ClusterTestBase extends ActiveMQTestBase {
servers[node] = addServer(server);
}
private HAPolicyConfiguration haPolicyBackupConfiguration(HAType haType) {
switch (haType) {
case SharedStore:
return new SharedStoreSlavePolicyConfiguration();
case SharedNothingReplication:
return new ReplicaPolicyConfiguration();
case PluggableQuorumReplication:
return ReplicationBackupPolicyConfiguration.withDefault()
.setDistributedManagerConfiguration(getOrCreatePluggableQuorumConfiguration());
default:
throw new AssertionError("Unsupported ha type = " + haType);
}
}
protected void setupLiveServerWithDiscovery(final int node,
final String groupAddress,
final int port,

View File

@ -87,14 +87,14 @@ public class ClusterWithBackupTest extends ClusterTestBase {
protected void setupServers() throws Exception {
// The backups
setupBackupServer(0, 3, isFileStorage(), true, isNetty());
setupBackupServer(1, 4, isFileStorage(), true, isNetty());
setupBackupServer(2, 5, isFileStorage(), true, isNetty());
setupBackupServer(0, 3, isFileStorage(), HAType.SharedStore, isNetty());
setupBackupServer(1, 4, isFileStorage(), HAType.SharedStore, isNetty());
setupBackupServer(2, 5, isFileStorage(), HAType.SharedStore, isNetty());
// The lives
setupLiveServer(3, isFileStorage(), true, isNetty(), false);
setupLiveServer(4, isFileStorage(), true, isNetty(), false);
setupLiveServer(5, isFileStorage(), true, isNetty(), false);
setupLiveServer(3, isFileStorage(), HAType.SharedStore, isNetty(), false);
setupLiveServer(4, isFileStorage(), HAType.SharedStore, isNetty(), false);
setupLiveServer(5, isFileStorage(), HAType.SharedStore, isNetty(), false);
}
}

View File

@ -46,14 +46,14 @@ public class SimpleSymmetricClusterTest extends ClusterTestBase {
@Test
public void testSimpleWithBackup() throws Exception {
// The backups
setupBackupServer(0, 3, isFileStorage(), true, isNetty());
setupBackupServer(1, 4, isFileStorage(), true, isNetty());
setupBackupServer(2, 5, isFileStorage(), true, isNetty());
setupBackupServer(0, 3, isFileStorage(), HAType.SharedStore, isNetty());
setupBackupServer(1, 4, isFileStorage(), HAType.SharedStore, isNetty());
setupBackupServer(2, 5, isFileStorage(), HAType.SharedStore, isNetty());
// The lives
setupLiveServer(3, isFileStorage(), true, isNetty(), false);
setupLiveServer(4, isFileStorage(), true, isNetty(), false);
setupLiveServer(5, isFileStorage(), true, isNetty(), false);
setupLiveServer(3, isFileStorage(), HAType.SharedStore, isNetty(), false);
setupLiveServer(4, isFileStorage(), HAType.SharedStore, isNetty(), false);
setupLiveServer(5, isFileStorage(), HAType.SharedStore, isNetty(), false);
setupClusterConnection("cluster0", "queues", MessageLoadBalancingType.ON_DEMAND, 1, isNetty(), 3, 4, 5);

View File

@ -453,18 +453,18 @@ public class SymmetricClusterWithBackupTest extends SymmetricClusterTest {
@Override
protected void setupServers() throws Exception {
// The backups
setupBackupServer(5, 0, isFileStorage(), true, isNetty());
setupBackupServer(6, 1, isFileStorage(), true, isNetty());
setupBackupServer(7, 2, isFileStorage(), true, isNetty());
setupBackupServer(8, 3, isFileStorage(), true, isNetty());
setupBackupServer(9, 4, isFileStorage(), true, isNetty());
setupBackupServer(5, 0, isFileStorage(), HAType.SharedStore, isNetty());
setupBackupServer(6, 1, isFileStorage(), HAType.SharedStore, isNetty());
setupBackupServer(7, 2, isFileStorage(), HAType.SharedStore, isNetty());
setupBackupServer(8, 3, isFileStorage(), HAType.SharedStore, isNetty());
setupBackupServer(9, 4, isFileStorage(), HAType.SharedStore, isNetty());
// The lives
setupLiveServer(0, isFileStorage(), true, isNetty(), false);
setupLiveServer(1, isFileStorage(), true, isNetty(), false);
setupLiveServer(2, isFileStorage(), true, isNetty(), false);
setupLiveServer(3, isFileStorage(), true, isNetty(), false);
setupLiveServer(4, isFileStorage(), true, isNetty(), false);
setupLiveServer(0, isFileStorage(), HAType.SharedStore, isNetty(), false);
setupLiveServer(1, isFileStorage(), HAType.SharedStore, isNetty(), false);
setupLiveServer(2, isFileStorage(), HAType.SharedStore, isNetty(), false);
setupLiveServer(3, isFileStorage(), HAType.SharedStore, isNetty(), false);
setupLiveServer(4, isFileStorage(), HAType.SharedStore, isNetty(), false);
}
@Override

View File

@ -60,6 +60,8 @@ import org.apache.activemq.artemis.core.server.cluster.ha.BackupPolicy;
import org.apache.activemq.artemis.core.server.cluster.ha.HAPolicy;
import org.apache.activemq.artemis.core.server.cluster.ha.ReplicaPolicy;
import org.apache.activemq.artemis.core.server.cluster.ha.ReplicatedPolicy;
import org.apache.activemq.artemis.core.server.cluster.ha.ReplicationBackupPolicy;
import org.apache.activemq.artemis.core.server.cluster.ha.ReplicationPrimaryPolicy;
import org.apache.activemq.artemis.core.server.cluster.ha.SharedStoreMasterPolicy;
import org.apache.activemq.artemis.core.server.cluster.ha.SharedStoreSlavePolicy;
import org.apache.activemq.artemis.core.server.files.FileMoveManager;
@ -657,6 +659,8 @@ public class FailoverTest extends FailoverTestBase {
backupServer.getServer().fail(true);
decrementActivationSequenceForForceRestartOf(liveServer);
liveServer.start();
consumer.close();
@ -786,7 +790,7 @@ public class FailoverTest extends FailoverTestBase {
((ReplicaPolicy) haPolicy).setMaxSavedReplicatedJournalsSize(1);
}
simpleFailover(haPolicy instanceof ReplicaPolicy, doFailBack);
simpleFailover(haPolicy instanceof ReplicaPolicy || haPolicy instanceof ReplicationBackupPolicy, doFailBack);
}
@Test(timeout = 120000)
@ -816,9 +820,12 @@ public class FailoverTest extends FailoverTestBase {
Thread.sleep(100);
Assert.assertFalse("backup is not running", backupServer.isStarted());
Assert.assertFalse("must NOT be a backup", liveServer.getServer().getHAPolicy() instanceof BackupPolicy);
final boolean isBackup = liveServer.getServer().getHAPolicy() instanceof BackupPolicy ||
liveServer.getServer().getHAPolicy() instanceof ReplicationBackupPolicy;
Assert.assertFalse("must NOT be a backup", isBackup);
adaptLiveConfigForReplicatedFailBack(liveServer);
beforeRestart(liveServer);
decrementActivationSequenceForForceRestartOf(liveServer);
liveServer.start();
Assert.assertTrue("live initialized...", liveServer.getServer().waitForActivation(15, TimeUnit.SECONDS));
@ -827,7 +834,8 @@ public class FailoverTest extends FailoverTestBase {
ClientSession session2 = createSession(sf, false, false);
session2.start();
ClientConsumer consumer2 = session2.createConsumer(FailoverTestBase.ADDRESS);
boolean replication = liveServer.getServer().getHAPolicy() instanceof ReplicatedPolicy;
final boolean replication = liveServer.getServer().getHAPolicy() instanceof ReplicatedPolicy ||
liveServer.getServer().getHAPolicy() instanceof ReplicationPrimaryPolicy;
if (replication)
receiveMessages(consumer2, 0, NUM_MESSAGES, true);
assertNoMoreMessages(consumer2);
@ -838,7 +846,7 @@ public class FailoverTest extends FailoverTestBase {
public void testSimpleFailover() throws Exception {
HAPolicy haPolicy = backupServer.getServer().getHAPolicy();
simpleFailover(haPolicy instanceof ReplicaPolicy, false);
simpleFailover(haPolicy instanceof ReplicaPolicy || haPolicy instanceof ReplicationBackupPolicy, false);
}
@Test(timeout = 120000)
@ -926,12 +934,13 @@ public class FailoverTest extends FailoverTestBase {
while (!backupServer.isStarted() && i++ < 100) {
Thread.sleep(100);
}
liveServer.getServer().waitForActivation(5, TimeUnit.SECONDS);
backupServer.getServer().waitForActivation(5, TimeUnit.SECONDS);
Assert.assertTrue(backupServer.isStarted());
if (isReplicated) {
FileMoveManager moveManager = new FileMoveManager(backupServer.getServer().getConfiguration().getJournalLocation(), 0);
Assert.assertEquals(1, moveManager.getNumberOfFolders());
// backup has not had a chance to restart as a backup and cleanup
Wait.assertTrue(() -> moveManager.getNumberOfFolders() <= 2);
}
} else {
backupServer.stop();
@ -2422,6 +2431,10 @@ public class FailoverTest extends FailoverTestBase {
// no-op
}
protected void decrementActivationSequenceForForceRestartOf(TestableServer liveServer) throws Exception {
// no-op
}
protected ClientSession sendAndConsume(final ClientSessionFactory sf1, final boolean createQueue) throws Exception {
ClientSession session = createSession(sf1, false, true, true);

View File

@ -19,6 +19,7 @@ package org.apache.activemq.artemis.tests.integration.cluster.failover;
import java.io.IOException;
import java.net.ServerSocket;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
@ -36,15 +37,18 @@ import org.apache.activemq.artemis.core.client.impl.ClientSessionFactoryInternal
import org.apache.activemq.artemis.core.client.impl.ServerLocatorInternal;
import org.apache.activemq.artemis.core.config.ClusterConnectionConfiguration;
import org.apache.activemq.artemis.core.config.Configuration;
import org.apache.activemq.artemis.core.config.ha.DistributedPrimitiveManagerConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicaPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.SharedStoreMasterPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.SharedStoreSlavePolicyConfiguration;
import org.apache.activemq.artemis.core.remoting.impl.invm.InVMConnector;
import org.apache.activemq.artemis.core.remoting.impl.invm.InVMRegistry;
import org.apache.activemq.artemis.core.server.NodeManager;
import org.apache.activemq.artemis.core.server.cluster.ha.HAPolicy;
import org.apache.activemq.artemis.core.server.cluster.ha.ReplicatedPolicy;
import org.apache.activemq.artemis.core.server.impl.ActiveMQServerImpl;
import org.apache.activemq.artemis.core.server.impl.InVMNodeManager;
import org.apache.activemq.artemis.quorum.file.FileBasedPrimitiveManager;
import org.apache.activemq.artemis.tests.integration.cluster.util.SameProcessActiveMQServer;
import org.apache.activemq.artemis.tests.integration.cluster.util.TestableServer;
import org.apache.activemq.artemis.tests.util.ActiveMQTestBase;
@ -79,6 +83,10 @@ public abstract class FailoverTestBase extends ActiveMQTestBase {
protected NodeManager nodeManager;
protected NodeManager backupNodeManager;
protected DistributedPrimitiveManagerConfiguration managerConfiguration;
protected boolean startBackupServer = true;
@Override
@ -164,6 +172,10 @@ public abstract class FailoverTestBase extends ActiveMQTestBase {
return new InVMNodeManager(false);
}
protected NodeManager createNodeManager(Configuration configuration) throws Exception {
return new InVMNodeManager(false, configuration.getNodeManagerLockLocation());
}
protected void createConfigs() throws Exception {
nodeManager = createNodeManager();
TransportConfiguration liveConnector = getConnectorTransportConfiguration(true);
@ -202,13 +214,14 @@ public abstract class FailoverTestBase extends ActiveMQTestBase {
backupConfig.setBindingsDirectory(getBindingsDir(0, true)).setJournalDirectory(getJournalDir(0, true)).setPagingDirectory(getPageDir(0, true)).setLargeMessagesDirectory(getLargeMessagesDir(0, true)).setSecurityEnabled(false);
setupHAPolicyConfiguration();
nodeManager = createReplicatedBackupNodeManager(backupConfig);
backupNodeManager = createReplicatedBackupNodeManager(backupConfig);
backupServer = createTestableServer(backupConfig);
backupServer = createTestableServer(backupConfig, backupNodeManager);
liveConfig.clearAcceptorConfigurations().addAcceptorConfiguration(getAcceptorTransportConfiguration(true));
liveServer = createTestableServer(liveConfig);
nodeManager = createNodeManager(liveConfig);
liveServer = createTestableServer(liveConfig, nodeManager);
if (supportsRetention()) {
liveServer.getServer().getConfiguration().setJournalRetentionDirectory(getJournalDir(0, false) + "_retention");
@ -216,7 +229,35 @@ public abstract class FailoverTestBase extends ActiveMQTestBase {
}
}
protected void createPluggableReplicatedConfigs() throws Exception {
final TransportConfiguration liveConnector = getConnectorTransportConfiguration(true);
final TransportConfiguration backupConnector = getConnectorTransportConfiguration(false);
final TransportConfiguration backupAcceptor = getAcceptorTransportConfiguration(false);
backupConfig = createDefaultInVMConfig();
liveConfig = createDefaultInVMConfig();
managerConfiguration =
new DistributedPrimitiveManagerConfiguration(FileBasedPrimitiveManager.class.getName(),
Collections.singletonMap("locks-folder", temporaryFolder.newFolder("manager").toString()));
ReplicatedBackupUtils.configurePluggableQuorumReplicationPair(backupConfig, backupConnector, backupAcceptor, liveConfig, liveConnector, null, managerConfiguration, managerConfiguration);
backupConfig.setBindingsDirectory(getBindingsDir(0, true)).setJournalDirectory(getJournalDir(0, true)).setPagingDirectory(getPageDir(0, true)).setLargeMessagesDirectory(getLargeMessagesDir(0, true)).setSecurityEnabled(false);
setupHAPolicyConfiguration();
backupNodeManager = createReplicatedBackupNodeManager(backupConfig);
backupServer = createTestableServer(backupConfig, backupNodeManager);
liveConfig.clearAcceptorConfigurations().addAcceptorConfiguration(getAcceptorTransportConfiguration(true));
nodeManager = createNodeManager(liveConfig);
liveServer = createTestableServer(liveConfig, nodeManager);
}
protected void setupHAPolicyConfiguration() {
Assert.assertTrue(backupConfig.getHAPolicyConfiguration() instanceof ReplicaPolicyConfiguration);
((ReplicaPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setMaxSavedReplicatedJournalsSize(-1).setAllowFailBack(true);
((ReplicaPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setRestartBackup(false);
}
@ -233,8 +274,11 @@ public abstract class FailoverTestBase extends ActiveMQTestBase {
configuration.getConnectorConfigurations().put(backupConnector.getName(), backupConnector);
return;
}
ReplicatedPolicy haPolicy = (ReplicatedPolicy) server.getServer().getHAPolicy();
haPolicy.setCheckForLiveServer(true);
HAPolicy policy = server.getServer().getHAPolicy();
if (policy instanceof ReplicatedPolicy) {
((ReplicatedPolicy) policy).setCheckForLiveServer(true);
}
}
@Override
@ -253,6 +297,7 @@ public abstract class FailoverTestBase extends ActiveMQTestBase {
nodeManager = null;
backupNodeManager = null;
try {
ServerSocket serverSocket = new ServerSocket(61616);
serverSocket.close();

View File

@ -19,8 +19,8 @@ package org.apache.activemq.artemis.tests.integration.cluster.failover;
public class GroupingFailoverReplicationTest extends GroupingFailoverTestBase {
@Override
protected boolean isSharedStore() {
return false;
protected HAType haType() {
return HAType.SharedNothingReplication;
}
}

View File

@ -19,7 +19,7 @@ package org.apache.activemq.artemis.tests.integration.cluster.failover;
public class GroupingFailoverSharedServerTest extends GroupingFailoverTestBase {
@Override
protected boolean isSharedStore() {
return true;
protected HAType haType() {
return HAType.SharedStore;
}
}

View File

@ -26,22 +26,26 @@ import org.apache.activemq.artemis.api.core.client.ServerLocator;
import org.apache.activemq.artemis.core.client.impl.TopologyMemberImpl;
import org.apache.activemq.artemis.core.config.ha.ReplicaPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicatedPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration;
import org.apache.activemq.artemis.core.server.cluster.impl.MessageLoadBalancingType;
import org.apache.activemq.artemis.core.server.group.impl.GroupingHandlerConfiguration;
import org.apache.activemq.artemis.core.server.impl.ReplicationBackupActivation;
import org.apache.activemq.artemis.core.server.impl.SharedNothingBackupActivation;
import org.apache.activemq.artemis.tests.integration.cluster.distribution.ClusterTestBase;
import org.apache.activemq.artemis.tests.util.ActiveMQTestBase;
import org.apache.activemq.artemis.utils.Wait;
import org.junit.Test;
public abstract class GroupingFailoverTestBase extends ClusterTestBase {
@Test
public void testGroupingLocalHandlerFails() throws Exception {
setupBackupServer(2, 0, isFileStorage(), isSharedStore(), isNetty());
setupBackupServer(2, 0, isFileStorage(), haType(), isNetty());
setupLiveServer(0, isFileStorage(), isSharedStore(), isNetty(), false);
setupLiveServer(0, isFileStorage(), haType(), isNetty(), false);
setupLiveServer(1, isFileStorage(), isSharedStore(), isNetty(), false);
setupLiveServer(1, isFileStorage(), haType(), isNetty(), false);
setupClusterConnection("cluster0", "queues", MessageLoadBalancingType.ON_DEMAND, 1, isNetty(), 0, 1);
@ -54,10 +58,18 @@ public abstract class GroupingFailoverTestBase extends ClusterTestBase {
setUpGroupHandler(GroupingHandlerConfiguration.TYPE.REMOTE, 1);
setUpGroupHandler(GroupingHandlerConfiguration.TYPE.LOCAL, 2);
if (!isSharedStore()) {
((ReplicatedPolicyConfiguration) servers[0].getConfiguration().getHAPolicyConfiguration()).setGroupName("group1");
((ReplicatedPolicyConfiguration) servers[1].getConfiguration().getHAPolicyConfiguration()).setGroupName("group2");
((ReplicaPolicyConfiguration) servers[2].getConfiguration().getHAPolicyConfiguration()).setGroupName("group1");
switch (haType()) {
case SharedNothingReplication:
((ReplicatedPolicyConfiguration) servers[0].getConfiguration().getHAPolicyConfiguration()).setGroupName("group1");
((ReplicatedPolicyConfiguration) servers[1].getConfiguration().getHAPolicyConfiguration()).setGroupName("group2");
((ReplicaPolicyConfiguration) servers[2].getConfiguration().getHAPolicyConfiguration()).setGroupName("group1");
break;
case PluggableQuorumReplication:
((ReplicationPrimaryPolicyConfiguration) servers[0].getConfiguration().getHAPolicyConfiguration()).setGroupName("group1");
((ReplicationPrimaryPolicyConfiguration) servers[1].getConfiguration().getHAPolicyConfiguration()).setGroupName("group2");
((ReplicationBackupPolicyConfiguration) servers[2].getConfiguration().getHAPolicyConfiguration()).setGroupName("group1");
break;
}
startServers(0, 1, 2);
@ -129,11 +141,11 @@ public abstract class GroupingFailoverTestBase extends ClusterTestBase {
@Test
public void testGroupingLocalHandlerFailsMultipleGroups() throws Exception {
setupBackupServer(2, 0, isFileStorage(), isSharedStore(), isNetty());
setupBackupServer(2, 0, isFileStorage(), haType(), isNetty());
setupLiveServer(0, isFileStorage(), isSharedStore(), isNetty(), false);
setupLiveServer(0, isFileStorage(), haType(), isNetty(), false);
setupLiveServer(1, isFileStorage(), isSharedStore(), isNetty(), false);
setupLiveServer(1, isFileStorage(), haType(), isNetty(), false);
setupClusterConnection("cluster0", "queues", MessageLoadBalancingType.ON_DEMAND, 1, isNetty(), 0, 1);
@ -147,10 +159,18 @@ public abstract class GroupingFailoverTestBase extends ClusterTestBase {
setUpGroupHandler(GroupingHandlerConfiguration.TYPE.LOCAL, 2);
if (!isSharedStore()) {
((ReplicatedPolicyConfiguration) servers[0].getConfiguration().getHAPolicyConfiguration()).setGroupName("group1");
((ReplicatedPolicyConfiguration) servers[1].getConfiguration().getHAPolicyConfiguration()).setGroupName("group2");
((ReplicaPolicyConfiguration) servers[2].getConfiguration().getHAPolicyConfiguration()).setGroupName("group1");
switch (haType()) {
case SharedNothingReplication:
((ReplicatedPolicyConfiguration) servers[0].getConfiguration().getHAPolicyConfiguration()).setGroupName("group1");
((ReplicatedPolicyConfiguration) servers[1].getConfiguration().getHAPolicyConfiguration()).setGroupName("group2");
((ReplicaPolicyConfiguration) servers[2].getConfiguration().getHAPolicyConfiguration()).setGroupName("group1");
break;
case PluggableQuorumReplication:
((ReplicationPrimaryPolicyConfiguration) servers[0].getConfiguration().getHAPolicyConfiguration()).setGroupName("group1");
((ReplicationPrimaryPolicyConfiguration) servers[1].getConfiguration().getHAPolicyConfiguration()).setGroupName("group2");
((ReplicationBackupPolicyConfiguration) servers[2].getConfiguration().getHAPolicyConfiguration()).setGroupName("group1");
break;
}
startServers(0, 1, 2);
@ -187,9 +207,17 @@ public abstract class GroupingFailoverTestBase extends ClusterTestBase {
verifyReceiveAllWithGroupIDRoundRobin(0, 30, 0, 1);
if (!isSharedStore()) {
SharedNothingBackupActivation backupActivation = (SharedNothingBackupActivation) servers[2].getActivation();
assertTrue(backupActivation.waitForBackupSync(10, TimeUnit.SECONDS));
switch (haType()) {
case SharedNothingReplication: {
SharedNothingBackupActivation backupActivation = (SharedNothingBackupActivation) servers[2].getActivation();
assertTrue(backupActivation.waitForBackupSync(10, TimeUnit.SECONDS));
}
break;
case PluggableQuorumReplication: {
ReplicationBackupActivation backupActivation = (ReplicationBackupActivation) servers[2].getActivation();
Wait.assertTrue(backupActivation::isReplicaSync, TimeUnit.SECONDS.toMillis(10));
}
break;
}
closeSessionFactory(0);

View File

@ -49,14 +49,14 @@ public class LiveVoteOnBackupFailureClusterTest extends ClusterWithBackupFailove
@Override
protected void setupServers() throws Exception {
// The backups
setupBackupServer(3, 0, isFileStorage(), isSharedStorage(), isNetty());
setupBackupServer(4, 1, isFileStorage(), isSharedStorage(), isNetty());
setupBackupServer(5, 2, isFileStorage(), isSharedStorage(), isNetty());
setupBackupServer(3, 0, isFileStorage(), haType(), isNetty());
setupBackupServer(4, 1, isFileStorage(), haType(), isNetty());
setupBackupServer(5, 2, isFileStorage(), haType(), isNetty());
// The lives
setupLiveServer(0, isFileStorage(), isSharedStorage(), isNetty(), false);
setupLiveServer(1, isFileStorage(), isSharedStorage(), isNetty(), false);
setupLiveServer(2, isFileStorage(), isSharedStorage(), isNetty(), false);
setupLiveServer(0, isFileStorage(), haType(), isNetty(), false);
setupLiveServer(1, isFileStorage(), haType(), isNetty(), false);
setupLiveServer(2, isFileStorage(), haType(), isNetty(), false);
//we need to know who is connected to who
((ReplicatedPolicyConfiguration) servers[0].getConfiguration().getHAPolicyConfiguration()).setGroupName("group0");
@ -71,9 +71,9 @@ public class LiveVoteOnBackupFailureClusterTest extends ClusterWithBackupFailove
((ReplicatedPolicyConfiguration) servers[1].getConfiguration().getHAPolicyConfiguration()).setVoteOnReplicationFailure(true);
((ReplicatedPolicyConfiguration) servers[2].getConfiguration().getHAPolicyConfiguration()).setVoteOnReplicationFailure(true);
}
protected boolean isSharedStorage() {
return false;
@Override
protected HAType haType() {
return HAType.SharedNothingReplication;
}
@Test

View File

@ -16,7 +16,9 @@
*/
package org.apache.activemq.artemis.tests.integration.cluster.failover;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.activemq.artemis.api.core.SimpleString;
@ -27,14 +29,19 @@ import org.apache.activemq.artemis.api.core.client.ClientSessionFactory;
import org.apache.activemq.artemis.core.client.impl.ServerLocatorInternal;
import org.apache.activemq.artemis.core.config.Configuration;
import org.apache.activemq.artemis.core.config.HAPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.DistributedPrimitiveManagerConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicaPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicatedPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.SharedStoreMasterPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.SharedStoreSlavePolicyConfiguration;
import org.apache.activemq.artemis.core.server.ActiveMQServer;
import org.apache.activemq.artemis.core.server.ActiveMQServerLogger;
import org.apache.activemq.artemis.core.server.NodeManager;
import org.apache.activemq.artemis.core.server.Queue;
import org.apache.activemq.artemis.quorum.file.FileBasedPrimitiveManager;
import org.apache.activemq.artemis.tests.integration.cluster.distribution.ClusterTestBase;
import org.apache.activemq.artemis.tests.util.Wait;
import org.apache.activemq.artemis.tests.integration.cluster.util.SameProcessActiveMQServer;
import org.apache.activemq.artemis.tests.integration.cluster.util.TestableServer;
@ -43,6 +50,21 @@ import org.apache.activemq.artemis.tests.util.TransportConfigurationUtils;
import org.junit.Before;
public abstract class MultipleServerFailoverTestBase extends ActiveMQTestBase {
private DistributedPrimitiveManagerConfiguration pluggableQuorumConfiguration = null;
private DistributedPrimitiveManagerConfiguration getOrCreatePluggableQuorumConfiguration() {
if (pluggableQuorumConfiguration != null) {
return pluggableQuorumConfiguration;
}
try {
pluggableQuorumConfiguration = new DistributedPrimitiveManagerConfiguration(FileBasedPrimitiveManager.class.getName(), Collections.singletonMap("locks-folder", temporaryFolder.newFolder("manager").toString()));
} catch (IOException ioException) {
return null;
}
return pluggableQuorumConfiguration;
}
// Constants -----------------------------------------------------
// TODO: find a better solution for this
@ -67,7 +89,15 @@ public abstract class MultipleServerFailoverTestBase extends ActiveMQTestBase {
public abstract boolean isNetty();
public abstract boolean isSharedStore();
public enum HAType {
SharedStore, SharedNothingReplication, PluggableQuorumReplication
}
public abstract HAType haType();
protected final boolean isSharedStore() {
return ClusterTestBase.HAType.SharedStore.equals(haType());
}
public abstract String getNodeGroupName();
@ -82,14 +112,22 @@ public abstract class MultipleServerFailoverTestBase extends ActiveMQTestBase {
for (int i = 0; i < getLiveServerCount(); i++) {
HAPolicyConfiguration haPolicyConfiguration = null;
switch (haType()) {
if (isSharedStore()) {
haPolicyConfiguration = new SharedStoreMasterPolicyConfiguration();
} else {
haPolicyConfiguration = new ReplicatedPolicyConfiguration();
if (getNodeGroupName() != null) {
((ReplicatedPolicyConfiguration) haPolicyConfiguration).setGroupName(getNodeGroupName() + "-" + i);
}
case SharedStore:
haPolicyConfiguration = new SharedStoreMasterPolicyConfiguration();
break;
case SharedNothingReplication:
haPolicyConfiguration = new ReplicatedPolicyConfiguration();
if (getNodeGroupName() != null) {
((ReplicatedPolicyConfiguration) haPolicyConfiguration).setGroupName(getNodeGroupName() + "-" + i);
}
break;
case PluggableQuorumReplication:
haPolicyConfiguration = ReplicationPrimaryPolicyConfiguration.withDefault()
.setDistributedManagerConfiguration(getOrCreatePluggableQuorumConfiguration())
.setGroupName(getNodeGroupName() != null ? (getNodeGroupName() + "-" + i) : null);
break;
}
Configuration configuration = createDefaultConfig(isNetty()).clearAcceptorConfigurations().addAcceptorConfiguration(getAcceptorTransportConfiguration(true, i)).setHAPolicyConfiguration(haPolicyConfiguration);
@ -126,13 +164,22 @@ public abstract class MultipleServerFailoverTestBase extends ActiveMQTestBase {
for (int i = 0; i < getBackupServerCount(); i++) {
HAPolicyConfiguration haPolicyConfiguration = null;
if (isSharedStore()) {
haPolicyConfiguration = new SharedStoreSlavePolicyConfiguration();
} else {
haPolicyConfiguration = new ReplicaPolicyConfiguration();
if (getNodeGroupName() != null) {
((ReplicaPolicyConfiguration) haPolicyConfiguration).setGroupName(getNodeGroupName() + "-" + i);
}
switch (haType()) {
case SharedStore:
haPolicyConfiguration = new SharedStoreSlavePolicyConfiguration();
break;
case SharedNothingReplication:
haPolicyConfiguration = new ReplicaPolicyConfiguration();
if (getNodeGroupName() != null) {
((ReplicaPolicyConfiguration) haPolicyConfiguration).setGroupName(getNodeGroupName() + "-" + i);
}
break;
case PluggableQuorumReplication:
haPolicyConfiguration = ReplicationBackupPolicyConfiguration.withDefault()
.setDistributedManagerConfiguration(getOrCreatePluggableQuorumConfiguration())
.setGroupName(getNodeGroupName() != null ? (getNodeGroupName() + "-" + i) : null);
break;
}
Configuration configuration = createDefaultConfig(isNetty()).clearAcceptorConfigurations().addAcceptorConfiguration(getAcceptorTransportConfiguration(false, i)).setHAPolicyConfiguration(haPolicyConfiguration);
@ -224,12 +271,14 @@ public abstract class MultipleServerFailoverTestBase extends ActiveMQTestBase {
return addClientSession(sf.createSession(xa, autoCommitSends, autoCommitAcks));
}
protected void waitForDistribution(SimpleString address, ActiveMQServer server, int messageCount) throws Exception {
protected boolean waitForDistribution(SimpleString address, ActiveMQServer server, int messageCount) throws Exception {
ActiveMQServerLogger.LOGGER.debug("waiting for distribution of messages on server " + server);
Queue q = (Queue) server.getPostOffice().getBinding(address).getBindable();
Wait.waitFor(() -> getMessageCount(q) >= messageCount);
return Wait.waitFor(() -> {
return getMessageCount(q) >= messageCount;
});
}
}

View File

@ -103,7 +103,7 @@ public class NettyReplicationStopTest extends FailoverTestBase {
final int numMessages = 10;
ReplicationEndpoint endpoint = backupServer.getServer().getReplicationEndpoint();
ReplicationEndpoint endpoint = getReplicationEndpoint(backupServer.getServer());
endpoint.pause();

View File

@ -124,14 +124,14 @@ public class NetworkIsolationTest extends FailoverTestBase {
liveServer.start();
for (int i = 0; i < 1000 && backupServer.getServer().getReplicationEndpoint() != null && !backupServer.getServer().getReplicationEndpoint().isStarted(); i++) {
for (int i = 0; i < 1000 && getReplicationEndpoint(backupServer.getServer()) != null && !getReplicationEndpoint(backupServer.getServer()).isStarted(); i++) {
Thread.sleep(10);
}
backupServer.getServer().getNetworkHealthCheck().clearAddresses();
// This will make sure the backup got synchronized after the network was activated again
Wait.assertTrue(() -> backupServer.getServer().getReplicationEndpoint().isStarted());
Assert.assertTrue(getReplicationEndpoint(backupServer.getServer()).isStarted());
} finally {
AssertionLoggerHandler.stopCapture();
}

View File

@ -17,12 +17,10 @@
package org.apache.activemq.artemis.tests.integration.cluster.failover;
import java.io.IOException;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.activemq.artemis.api.core.ActiveMQException;
import org.apache.activemq.artemis.api.core.Interceptor;
import org.apache.activemq.artemis.api.core.QueueConfiguration;
import org.apache.activemq.artemis.api.core.SimpleString;
import org.apache.activemq.artemis.api.core.TransportConfiguration;
@ -34,16 +32,18 @@ import org.apache.activemq.artemis.core.client.impl.ClientSessionFactoryInternal
import org.apache.activemq.artemis.core.config.Configuration;
import org.apache.activemq.artemis.core.config.ha.ReplicaPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicatedPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.SharedStoreSlavePolicyConfiguration;
import org.apache.activemq.artemis.core.protocol.core.Packet;
import org.apache.activemq.artemis.core.protocol.core.impl.PacketImpl;
import org.apache.activemq.artemis.core.replication.ReplicationEndpoint;
import org.apache.activemq.artemis.core.server.NodeManager;
import org.apache.activemq.artemis.core.server.impl.Activation;
import org.apache.activemq.artemis.core.server.impl.ActiveMQServerImpl;
import org.apache.activemq.artemis.core.server.impl.InVMNodeManager;
import org.apache.activemq.artemis.core.server.impl.ReplicationBackupActivation;
import org.apache.activemq.artemis.core.server.impl.SharedNothingBackupActivation;
import org.apache.activemq.artemis.tests.util.Wait;
import org.apache.activemq.artemis.logs.AssertionLoggerHandler;
import org.apache.activemq.artemis.spi.core.protocol.RemotingConnection;
import org.apache.activemq.artemis.tests.integration.cluster.util.SameProcessActiveMQServer;
import org.apache.activemq.artemis.tests.integration.cluster.util.TestableServer;
import org.apache.activemq.artemis.tests.util.ActiveMQTestBase;
@ -77,7 +77,9 @@ public class ReplicaTimeoutTest extends ActiveMQTestBase {
}
protected TestableServer createTestableServer(Configuration config, NodeManager nodeManager) throws Exception {
boolean isBackup = config.getHAPolicyConfiguration() instanceof ReplicaPolicyConfiguration || config.getHAPolicyConfiguration() instanceof SharedStoreSlavePolicyConfiguration;
boolean isBackup = config.getHAPolicyConfiguration() instanceof ReplicationBackupPolicyConfiguration ||
config.getHAPolicyConfiguration() instanceof ReplicaPolicyConfiguration ||
config.getHAPolicyConfiguration() instanceof SharedStoreSlavePolicyConfiguration;
return new SameProcessActiveMQServer(createInVMFailoverServer(true, config, nodeManager, isBackup ? 2 : 1));
}
@ -119,6 +121,19 @@ public class ReplicaTimeoutTest extends ActiveMQTestBase {
liveServer.crash(true, true, sessions);
}
protected void configureReplicationPair(Configuration backupConfig,
Configuration liveConfig,
TransportConfiguration backupConnector,
TransportConfiguration backupAcceptor,
TransportConfiguration liveConnector) throws IOException {
ReplicatedBackupUtils.configureReplicationPair(backupConfig, backupConnector, backupAcceptor, liveConfig, liveConnector, null);
((ReplicatedPolicyConfiguration) liveConfig.getHAPolicyConfiguration()).setInitialReplicationSyncTimeout(1000);
((ReplicaPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setInitialReplicationSyncTimeout(1000);
((ReplicatedPolicyConfiguration) liveConfig.getHAPolicyConfiguration()).setCheckForLiveServer(true);
((ReplicaPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setMaxSavedReplicatedJournalsSize(2).setAllowFailBack(true);
((ReplicaPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setRestartBackup(false);
}
@Test//(timeout = 120000)
public void testFailbackTimeout() throws Exception {
AssertionLoggerHandler.startCapture();
@ -134,29 +149,22 @@ public class ReplicaTimeoutTest extends ActiveMQTestBase {
Configuration backupConfig = createDefaultInVMConfig();
Configuration liveConfig = createDefaultInVMConfig();
ReplicatedBackupUtils.configureReplicationPair(backupConfig, backupConnector, backupAcceptor, liveConfig, liveConnector, null);
((ReplicatedPolicyConfiguration) liveConfig.getHAPolicyConfiguration()).setInitialReplicationSyncTimeout(1000);
((ReplicaPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setInitialReplicationSyncTimeout(1000);
configureReplicationPair(backupConfig, liveConfig, backupConnector, backupAcceptor, liveConnector);
backupConfig.setBindingsDirectory(getBindingsDir(0, true)).setJournalDirectory(getJournalDir(0, true)).
setPagingDirectory(getPageDir(0, true)).setLargeMessagesDirectory(getLargeMessagesDir(0, true)).setSecurityEnabled(false);
liveConfig.setBindingsDirectory(getBindingsDir(0, false)).setJournalDirectory(getJournalDir(0, false)).
setPagingDirectory(getPageDir(0, false)).setLargeMessagesDirectory(getLargeMessagesDir(0, false)).setSecurityEnabled(false);
((ReplicatedPolicyConfiguration) liveConfig.getHAPolicyConfiguration()).setCheckForLiveServer(true);
((ReplicaPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setMaxSavedReplicatedJournalsSize(2).setAllowFailBack(true);
((ReplicaPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setRestartBackup(false);
NodeManager replicatedBackupNodeManager = createReplicatedBackupNodeManager(backupConfig);
NodeManager nodeManager = createReplicatedBackupNodeManager(backupConfig);
backupServer = createTestableServer(backupConfig, nodeManager);
backupServer = createTestableServer(backupConfig, replicatedBackupNodeManager);
liveConfig.clearAcceptorConfigurations().addAcceptorConfiguration(getAcceptorTransportConfiguration(true));
NodeManager nodeManager = createReplicatedBackupNodeManager(liveConfig);
liveServer = createTestableServer(liveConfig, nodeManager);
AtomicBoolean ignoreIntercept = new AtomicBoolean(false);
final TestableServer theBackup = backupServer;
liveServer.start();
@ -174,23 +182,30 @@ public class ReplicaTimeoutTest extends ActiveMQTestBase {
Wait.assertTrue(backupServer.getServer()::isActive);
ignoreIntercept.set(true);
((ActiveMQServerImpl) backupServer.getServer()).setAfterActivationCreated(new Runnable() {
@Override
public void run() {
//theBackup.getServer().getActivation()
SharedNothingBackupActivation activation = (SharedNothingBackupActivation) theBackup.getServer().getActivation();
activation.getReplicationEndpoint().addOutgoingInterceptorForReplication(new Interceptor() {
@Override
public boolean intercept(Packet packet, RemotingConnection connection) throws ActiveMQException {
if (ignoreIntercept.get() && packet.getType() == PacketImpl.REPLICATION_RESPONSE_V2) {
final Activation backupActivation = theBackup.getServer().getActivation();
if (backupActivation instanceof SharedNothingBackupActivation) {
SharedNothingBackupActivation activation = (SharedNothingBackupActivation) backupActivation;
ReplicationEndpoint repEnd = activation.getReplicationEndpoint();
repEnd.addOutgoingInterceptorForReplication((packet, connection) -> {
if (packet.getType() == PacketImpl.REPLICATION_RESPONSE_V2) {
return false;
}
return true;
}
});
});
} else if (backupActivation instanceof ReplicationBackupActivation) {
ReplicationBackupActivation activation = (ReplicationBackupActivation) backupActivation;
activation.spyReplicationEndpointCreation(replicationEndpoint -> {
replicationEndpoint.addOutgoingInterceptorForReplication((packet, connection) -> {
if (packet.getType() == PacketImpl.REPLICATION_RESPONSE_V2) {
return false;
}
return true;
});
});
}
}
});
@ -198,7 +213,9 @@ public class ReplicaTimeoutTest extends ActiveMQTestBase {
Assert.assertTrue(Wait.waitFor(() -> AssertionLoggerHandler.findText("AMQ229114")));
Wait.assertFalse(liveServer.getServer()::isStarted);
if (expectLiveSuicide()) {
Wait.assertFalse(liveServer.getServer()::isStarted);
}
} finally {
if (sf != null) {
@ -218,4 +235,8 @@ public class ReplicaTimeoutTest extends ActiveMQTestBase {
}
}
protected boolean expectLiveSuicide() {
return true;
}
}

View File

@ -178,9 +178,9 @@ public class ReplicatedDistributionTest extends ClusterTestBase {
public void setUp() throws Exception {
super.setUp();
setupLiveServer(1, true, isSharedStore(), true, false);
setupLiveServer(3, true, isSharedStore(), true, false);
setupBackupServer(2, 3, true, isSharedStore(), true);
setupLiveServer(1, true, haType(), true, false);
setupLiveServer(3, true, haType(), true, false);
setupBackupServer(2, 3, true, haType(), true);
final String address = ReplicatedDistributionTest.ADDRESS.toString();
// notice the abuse of the method call, '3' is not a backup for '1'
@ -210,7 +210,7 @@ public class ReplicatedDistributionTest extends ClusterTestBase {
}
@Override
protected boolean isSharedStore() {
return false;
protected HAType haType() {
return HAType.SharedNothingReplication;
}
}

View File

@ -29,6 +29,7 @@ import org.apache.activemq.artemis.api.core.client.ClientSessionFactory;
import org.apache.activemq.artemis.api.core.client.FailoverEventType;
import org.apache.activemq.artemis.api.core.client.ServerLocator;
import org.apache.activemq.artemis.core.config.ha.ReplicaPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
import org.apache.activemq.artemis.core.server.ActiveMQServer;
import org.apache.activemq.artemis.tests.util.Wait;
import org.apache.activemq.artemis.tests.integration.cluster.util.TestableServer;
@ -51,8 +52,16 @@ public class ReplicatedMultipleServerFailoverExtraBackupsTest extends Replicated
@Override
@Test
public void testStartLiveFirst() throws Exception {
((ReplicaPolicyConfiguration) backupServers.get(2).getServer().getConfiguration().getHAPolicyConfiguration()).setGroupName(getNodeGroupName() + "-0");
((ReplicaPolicyConfiguration) backupServers.get(3).getServer().getConfiguration().getHAPolicyConfiguration()).setGroupName(getNodeGroupName() + "-1");
switch (haType()) {
case SharedNothingReplication:
((ReplicaPolicyConfiguration) backupServers.get(2).getServer().getConfiguration().getHAPolicyConfiguration()).setGroupName(getNodeGroupName() + "-0");
((ReplicaPolicyConfiguration) backupServers.get(3).getServer().getConfiguration().getHAPolicyConfiguration()).setGroupName(getNodeGroupName() + "-1");
break;
case PluggableQuorumReplication:
((ReplicationBackupPolicyConfiguration) backupServers.get(2).getServer().getConfiguration().getHAPolicyConfiguration()).setGroupName(getNodeGroupName() + "-0");
((ReplicationBackupPolicyConfiguration) backupServers.get(3).getServer().getConfiguration().getHAPolicyConfiguration()).setGroupName(getNodeGroupName() + "-1");
break;
}
startServers(liveServers);
backupServers.get(0).start();
@ -66,7 +75,10 @@ public class ReplicatedMultipleServerFailoverExtraBackupsTest extends Replicated
sendCrashReceive();
Wait.assertTrue(backupServers.get(0)::isActive, 5000, 10);
Wait.assertTrue(backupServers.get(1)::isActive, 5000, 10);
waitForTopology(backupServers.get(0).getServer(), liveServers.size(), 2);
waitForTopology(backupServers.get(1).getServer(), liveServers.size(), 2);
sendCrashBackupReceive();
}
@ -85,8 +97,17 @@ public class ReplicatedMultipleServerFailoverExtraBackupsTest extends Replicated
@Override
@Test
public void testStartBackupFirst() throws Exception {
((ReplicaPolicyConfiguration) backupServers.get(2).getServer().getConfiguration().getHAPolicyConfiguration()).setGroupName(getNodeGroupName() + "-0");
((ReplicaPolicyConfiguration) backupServers.get(3).getServer().getConfiguration().getHAPolicyConfiguration()).setGroupName(getNodeGroupName() + "-1");
switch (haType()) {
case SharedNothingReplication:
((ReplicaPolicyConfiguration) backupServers.get(2).getServer().getConfiguration().getHAPolicyConfiguration()).setGroupName(getNodeGroupName() + "-0");
((ReplicaPolicyConfiguration) backupServers.get(3).getServer().getConfiguration().getHAPolicyConfiguration()).setGroupName(getNodeGroupName() + "-1");
break;
case PluggableQuorumReplication:
((ReplicationBackupPolicyConfiguration) backupServers.get(2).getServer().getConfiguration().getHAPolicyConfiguration()).setGroupName(getNodeGroupName() + "-0");
((ReplicationBackupPolicyConfiguration) backupServers.get(3).getServer().getConfiguration().getHAPolicyConfiguration()).setGroupName(getNodeGroupName() + "-1");
break;
}
startServers(backupServers);
startServers(liveServers);
@ -97,6 +118,14 @@ public class ReplicatedMultipleServerFailoverExtraBackupsTest extends Replicated
}
protected void sendCrashBackupReceive() throws Exception {
//make sure bindings are ready before sending messages b/c we verify strict load balancing in waitForDistribution
this.waitForBindings( backupServers.get(0).getServer(), ADDRESS.toString(), false, 1, 0, 2000);
this.waitForBindings( backupServers.get(0).getServer(), ADDRESS.toString(), false, 1, 0, 2000);
this.waitForBindings( backupServers.get(1).getServer(), ADDRESS.toString(), false, 1, 0, 2000);
this.waitForBindings( backupServers.get(1).getServer(), ADDRESS.toString(), false, 1, 0, 2000);
ServerLocator locator0 = getBackupServerLocator(0);
ServerLocator locator1 = getBackupServerLocator(1);
@ -120,8 +149,8 @@ public class ReplicatedMultipleServerFailoverExtraBackupsTest extends Replicated
producer.close();
waitForDistribution(ADDRESS, backupServers.get(0).getServer(), 100);
waitForDistribution(ADDRESS, backupServers.get(1).getServer(), 100);
assertTrue(waitForDistribution(ADDRESS, backupServers.get(0).getServer(), 100));
assertTrue(waitForDistribution(ADDRESS, backupServers.get(1).getServer(), 100));
List<TestableServer> toCrash = new ArrayList<>();
for (TestableServer backupServer : backupServers) {

View File

@ -16,6 +16,9 @@
*/
package org.apache.activemq.artemis.tests.integration.cluster.failover;
import java.util.Arrays;
import java.util.Collection;
import org.apache.activemq.artemis.api.core.QueueConfiguration;
import org.apache.activemq.artemis.api.core.client.ClientConsumer;
import org.apache.activemq.artemis.api.core.client.ClientMessage;
@ -25,9 +28,20 @@ import org.apache.activemq.artemis.api.core.client.ClientSessionFactory;
import org.apache.activemq.artemis.api.core.client.ServerLocator;
import org.apache.activemq.artemis.tests.integration.cluster.util.TestableServer;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
@RunWith(Parameterized.class)
public class ReplicatedMultipleServerFailoverTest extends MultipleServerFailoverTestBase {
@Parameterized.Parameter
public HAType haType;
@Parameterized.Parameters(name = "ha={0}")
public static Collection<Object[]> getParams() {
return Arrays.asList(new Object[][]{{HAType.SharedNothingReplication}, {HAType.PluggableQuorumReplication}});
}
@Test
public void testStartLiveFirst() throws Exception {
for (TestableServer liveServer : liveServers) {
@ -140,8 +154,8 @@ public class ReplicatedMultipleServerFailoverTest extends MultipleServerFailover
}
@Override
public boolean isSharedStore() {
return false;
public HAType haType() {
return haType;
}
@Override

View File

@ -19,7 +19,7 @@ package org.apache.activemq.artemis.tests.integration.cluster.failover;
public class SharedStoreDistributionTest extends ReplicatedDistributionTest {
@Override
protected boolean isSharedStore() {
return true;
protected HAType haType() {
return HAType.SharedStore;
}
}

View File

@ -41,8 +41,8 @@ public class SharedStoreDontWaitForActivationTest extends ClusterTestBase {
// 1. configure 0 as backup of one to share the same node manager and file
// storage locations
setupBackupServer(0, 1, isFileStorage(), true, isNetty());
setupLiveServer(1, isFileStorage(), true, isNetty(), false);
setupBackupServer(0, 1, isFileStorage(), HAType.SharedStore, isNetty());
setupLiveServer(1, isFileStorage(), HAType.SharedStore, isNetty(), false);
// now reconfigure the HA policy for both servers to master with automatic
// failover and wait-for-activation disabled.

View File

@ -40,8 +40,8 @@ public class SharedStoreMetricsLeakTest extends ClusterTestBase {
}
private void setupServers() throws Exception {
setupLiveServer(0, isFileStorage(), true, isNetty(), false);
setupBackupServer(1, 0, isFileStorage(), true, isNetty());
setupLiveServer(0, isFileStorage(), HAType.SharedStore, isNetty(), false);
setupBackupServer(1, 0, isFileStorage(), HAType.SharedStore, isNetty());
getServer(0).getConfiguration().setHAPolicyConfiguration(new SharedStoreMasterPolicyConfiguration().setFailoverOnServerShutdown(true));
getServer(0).getConfiguration().setMetricsConfiguration(new MetricsConfiguration().setJvmThread(false).setJvmGc(false).setJvmMemory(false).setPlugin(new SimpleMetricsPlugin().init(null)));

View File

@ -41,9 +41,9 @@ public class SharedStoreScaleDownBackupTest extends ClusterTestBase {
public void setUp() throws Exception {
super.setUp();
setupLiveServer(0, isFileStorage(), true, isNetty(), false);
setupLiveServer(1, isFileStorage(), true, isNetty(), false);
setupBackupServer(2, 0, isFileStorage(), true, isNetty());
setupLiveServer(0, isFileStorage(), HAType.SharedStore, isNetty(), false);
setupLiveServer(1, isFileStorage(), HAType.SharedStore, isNetty(), false);
setupBackupServer(2, 0, isFileStorage(), HAType.SharedStore, isNetty());
setupClusterConnection("cluster0", "testAddress", MessageLoadBalancingType.ON_DEMAND, 1, isNetty(), 0, 1);
setupClusterConnection("cluster1", "testAddress", MessageLoadBalancingType.ON_DEMAND, 1, isNetty(), 1, 0);

View File

@ -42,13 +42,13 @@ public class StaticClusterWithBackupFailoverTest extends ClusterWithBackupFailov
@Override
protected void setupServers() throws Exception {
// The backups
setupBackupServer(3, 0, isFileStorage(), isSharedStorage(), isNetty());
setupBackupServer(4, 1, isFileStorage(), isSharedStorage(), isNetty());
setupBackupServer(5, 2, isFileStorage(), isSharedStorage(), isNetty());
setupBackupServer(3, 0, isFileStorage(), haType(), isNetty());
setupBackupServer(4, 1, isFileStorage(), haType(), isNetty());
setupBackupServer(5, 2, isFileStorage(), haType(), isNetty());
// The lives
setupLiveServer(0, isFileStorage(), isSharedStorage(), isNetty(), false);
setupLiveServer(1, isFileStorage(), isSharedStorage(), isNetty(), false);
setupLiveServer(2, isFileStorage(), isSharedStorage(), isNetty(), false);
setupLiveServer(0, isFileStorage(), haType(), isNetty(), false);
setupLiveServer(1, isFileStorage(), haType(), isNetty(), false);
setupLiveServer(2, isFileStorage(), haType(), isNetty(), false);
}
}

Some files were not shown because too many files have changed in this diff Show More