ARTEMIS-2716 Pluggable Quorum Vote

This commit is contained in:
Francesco Nigro 2020-04-14 17:11:32 +02:00 committed by Clebert Suconic
parent 9989d9c4fc
commit 536271485f
116 changed files with 8405 additions and 298 deletions

View File

@ -270,6 +270,18 @@ public class ThreadLeakCheckRule extends TestWatcher {
} else if (threadName.contains("ObjectCleanerThread")) {
// Required since upgrade to Netty 4.1.22 maybe because https://github.com/netty/netty/commit/739e70398ccb6b11ffa97c6b5f8d55e455a2165e
return true;
} else if (threadName.contains("RMI TCP")) {
return true;
} else if (threadName.contains("RMI Scheduler")) {
return true;
} else if (threadName.contains("RMI RenewClean")) {
return true;
} else if (threadName.contains("Signal Dispatcher")) {
return true;
} else if (threadName.contains("ForkJoinPool.commonPool")) {
return true;
} else if (threadName.contains("GC Daemon")) {
return true;
} else {
for (StackTraceElement element : thread.getStackTrace()) {
if (element.getClassName().contains("org.jboss.byteman.agent.TransformListener")) {

View File

@ -264,6 +264,9 @@ public final class ActiveMQDefaultConfiguration {
// the directory to store the journal files in
private static String DEFAULT_JOURNAL_DIR = "data/journal";
// the directory to store the data files in
private static String DEFAULT_DATA_DIR = "data";
// true means that the journal directory will be created
private static boolean DEFAULT_CREATE_JOURNAL_DIR = true;
@ -627,6 +630,8 @@ public final class ActiveMQDefaultConfiguration {
public static final String DEFAULT_TEMPORARY_QUEUE_NAMESPACE = "";
private static final String DEFAULT_DISTRIBUTED_PRIMITIVE_MANAGER_CLASS_NAME = "org.apache.activemq.artemis.quorum.zookeeper.CuratorDistributedPrimitiveManager";
// Number of concurrent workers for a core bridge
public static int DEFAULT_BRIDGE_CONCURRENCY = 1;
@ -938,6 +943,13 @@ public final class ActiveMQDefaultConfiguration {
return DEFAULT_JOURNAL_DIR;
}
/**
* the directory to store the journal files in
*/
public static String getDefaultDataDir() {
return DEFAULT_DATA_DIR;
}
/**
* true means that the journal directory will be created
*/
@ -1721,6 +1733,10 @@ public final class ActiveMQDefaultConfiguration {
return DEFAULT_TEMPORARY_QUEUE_NAMESPACE;
}
public static String getDefaultDistributedPrimitiveManagerClassName() {
return DEFAULT_DISTRIBUTED_PRIMITIVE_MANAGER_CLASS_NAME;
}
public static int getDefaultBridgeConcurrency() {
return DEFAULT_BRIDGE_CONCURRENCY;
}

View File

@ -231,6 +231,17 @@
<version>${project.version}</version>
<classifier>javadoc</classifier>
</dependency>
<!-- quorum -->
<dependency>
<groupId>org.apache.activemq</groupId>
<artifactId>artemis-quorum-api</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.activemq</groupId>
<artifactId>artemis-quorum-ri</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>io.netty</groupId>
<artifactId>netty-all</artifactId>

View File

@ -62,6 +62,9 @@
<include>org.apache.activemq.rest:artemis-rest</include>
<include>org.apache.qpid:qpid-jms-client</include>
<include>io.micrometer:micrometer-core</include>
<!-- quorum -->
<include>org.apache.activemq:artemis-quorum-api</include>
<include>org.apache.activemq:artemis-quorum-ri</include>
<!-- dependencies -->
<include>jakarta.jms:jakarta.jms-api</include>
@ -97,6 +100,12 @@
<include>com.sun.xml.bind:jaxb-impl</include>
<include>jakarta.activation:jakarta.activation-api</include>
<include>jakarta.security.auth.message:jakarta.security.auth.message-api</include>
<!-- quorum -->
<include>org.apache.curator:curator-recipes</include>
<include>org.apache.curator:curator-client</include>
<include>org.apache.curator:curator-framework</include>
<include>org.apache.zookeeper:zookeeper</include>
<include>org.apache.zookeeper:zookeeper-jute</include>
</includes>
<!--excludes>
<exclude>org.apache.activemq:artemis-website</exclude>

View File

@ -81,6 +81,7 @@
<!--bundle dependency="true">mvn:io.micrometer/micrometer-core/${version.micrometer}</bundle-->
<bundle>mvn:org.apache.activemq/activemq-artemis-native/${activemq-artemis-native-version}</bundle>
<bundle>mvn:org.apache.activemq/artemis-quorum-api/${pom.version}</bundle>
<bundle>mvn:org.apache.activemq/artemis-server-osgi/${pom.version}</bundle>
</feature>

View File

@ -0,0 +1,41 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.activemq</groupId>
<artifactId>artemis-pom</artifactId>
<version>2.18.0-SNAPSHOT</version>
</parent>
<artifactId>artemis-quorum-api</artifactId>
<packaging>bundle</packaging>
<name>ActiveMQ Artemis Quorum API</name>
<properties>
<activemq.basedir>${project.basedir}/..</activemq.basedir>
</properties>
<dependencies>
<dependency>
<groupId>com.google.errorprone</groupId>
<artifactId>error_prone_core</artifactId>
</dependency>
</dependencies>
</project>

View File

@ -0,0 +1,87 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.quorum;
import java.util.Objects;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.LockSupport;
public interface DistributedLock extends AutoCloseable {
String getLockId();
boolean isHeldByCaller() throws UnavailableStateException;
boolean tryLock() throws UnavailableStateException, InterruptedException;
default boolean tryLock(long timeout, TimeUnit unit) throws UnavailableStateException, InterruptedException {
// it doesn't make sense to be super fast
final long TARGET_FIRE_PERIOD_NS = TimeUnit.MILLISECONDS.toNanos(250);
if (timeout < 0) {
throw new IllegalArgumentException("timeout cannot be negative");
}
Objects.requireNonNull(unit);
if (timeout == 0) {
return tryLock();
}
final Thread currentThread = Thread.currentThread();
final long timeoutNs = unit.toNanos(timeout);
final long start = System.nanoTime();
final long deadline = start + timeoutNs;
long expectedNextFireTime = start;
while (!currentThread.isInterrupted()) {
long parkNs = expectedNextFireTime - System.nanoTime();
while (parkNs > 0) {
LockSupport.parkNanos(parkNs);
if (currentThread.isInterrupted()) {
throw new InterruptedException();
}
final long now = System.nanoTime();
parkNs = expectedNextFireTime - now;
}
if (tryLock()) {
return true;
}
final long now = System.nanoTime();
final long remainingTime = deadline - now;
if (remainingTime <= 0) {
return false;
}
if (remainingTime < TARGET_FIRE_PERIOD_NS) {
expectedNextFireTime = now;
} else {
expectedNextFireTime += TARGET_FIRE_PERIOD_NS;
}
}
throw new InterruptedException();
}
void unlock() throws UnavailableStateException;
void addListener(UnavailableLockListener listener);
void removeListener(UnavailableLockListener listener);
@FunctionalInterface
interface UnavailableLockListener {
void onUnavailableLockEvent();
}
@Override
void close();
}

View File

@ -0,0 +1,56 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.quorum;
import java.util.Map;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
public interface DistributedPrimitiveManager extends AutoCloseable {
static DistributedPrimitiveManager newInstanceOf(String className, Map<String, String> properties) throws Exception {
return (DistributedPrimitiveManager) Class.forName(className).getDeclaredConstructor(Map.class).newInstance(properties);
}
@FunctionalInterface
interface UnavailableManagerListener {
void onUnavailableManagerEvent();
}
void addUnavailableManagerListener(UnavailableManagerListener listener);
void removeUnavailableManagerListener(UnavailableManagerListener listener);
boolean start(long timeout, TimeUnit unit) throws InterruptedException, ExecutionException;
void start() throws InterruptedException, ExecutionException;
boolean isStarted();
void stop();
DistributedLock getDistributedLock(String lockId) throws InterruptedException, ExecutionException, TimeoutException;
MutableLong getMutableLong(String mutableLongId) throws InterruptedException, ExecutionException, TimeoutException;
@Override
default void close() {
stop();
}
}

View File

@ -0,0 +1,51 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.activemq.artemis.quorum;
public interface MutableLong extends AutoCloseable {
String getMutableLongId();
long get() throws UnavailableStateException;
void set(long value) throws UnavailableStateException;
/**
* This is not meant to be atomic; it's semantically equivalent to:
* <pre>
* long oldValue = mutableLong.get();
* if (mutableLong.oldValue != expectedValue) {
* return false;
* }
* mutableLong.set(newValue);
* return true;
* </pre>
*/
default boolean compareAndSet(long expectedValue, long newValue) throws UnavailableStateException {
final long oldValue = get();
if (oldValue != expectedValue) {
return false;
}
set(newValue);
return true;
}
@Override
void close();
}

View File

@ -0,0 +1,36 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.quorum;
public final class UnavailableStateException extends Exception {
public UnavailableStateException() {
super();
}
public UnavailableStateException(String message) {
super(message);
}
public UnavailableStateException(String message, Throwable cause) {
super(message, cause);
}
public UnavailableStateException(Throwable cause) {
super(cause);
}
}

124
artemis-quorum-ri/pom.xml Normal file
View File

@ -0,0 +1,124 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.activemq</groupId>
<artifactId>artemis-pom</artifactId>
<version>2.18.0-SNAPSHOT</version>
</parent>
<artifactId>artemis-quorum-ri</artifactId>
<packaging>jar</packaging>
<name>ActiveMQ Artemis Quorum RI</name>
<properties>
<activemq.basedir>${project.basedir}/..</activemq.basedir>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.curator</groupId>
<artifactId>curator-recipes</artifactId>
</dependency>
<dependency>
<groupId>org.apache.curator</groupId>
<artifactId>curator-client</artifactId>
</dependency>
<dependency>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
</dependency>
<dependency>
<groupId>org.apache.curator</groupId>
<artifactId>curator-test</artifactId>
<version>${curator.version}</version>
</dependency>
<dependency>
<groupId>org.apache.activemq</groupId>
<artifactId>artemis-quorum-api</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.jboss.logging</groupId>
<artifactId>jboss-logging</artifactId>
</dependency>
<dependency>
<groupId>org.apache.activemq</groupId>
<artifactId>artemis-commons</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.google.errorprone</groupId>
<artifactId>error_prone_core</artifactId>
</dependency>
<!-- tests -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.hamcrest</groupId>
<artifactId>hamcrest</artifactId>
<version>${hamcrest.version}</version>
<scope>test</scope>
</dependency>
<!-- test logging -->
<dependency>
<groupId>org.jboss.logging</groupId>
<artifactId>jboss-logging-processor</artifactId>
<scope>provided</scope>
<optional>true</optional>
</dependency>
<dependency>
<groupId>org.jboss.logmanager</groupId>
<artifactId>jboss-logmanager</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.wildfly.common</groupId>
<artifactId>wildfly-common</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.activemq</groupId>
<artifactId>artemis-commons</artifactId>
<version>${project.version}</version>
<scope>test</scope>
<type>test-jar</type>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<executions>
<execution>
<phase>test</phase>
<goals>
<goal>test-jar</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View File

@ -0,0 +1,134 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.quorum.file;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import org.apache.activemq.artemis.quorum.DistributedLock;
import org.apache.activemq.artemis.quorum.DistributedPrimitiveManager;
import org.apache.activemq.artemis.quorum.MutableLong;
/**
* This is an implementation suitable to be used just on unit tests and it won't attempt
* to manage nor purge existing stale locks files. It's part of the tests life-cycle to properly
* set-up and tear-down the environment.
*/
public class FileBasedPrimitiveManager implements DistributedPrimitiveManager {
private final File locksFolder;
private final Map<String, FileDistributedLock> locks;
private boolean started;
public FileBasedPrimitiveManager(Map<String, String> args) {
this(new File(args.get("locks-folder")));
}
public FileBasedPrimitiveManager(File locksFolder) {
Objects.requireNonNull(locksFolder);
if (!locksFolder.exists()) {
throw new IllegalStateException(locksFolder + " is supposed to already exists");
}
if (!locksFolder.isDirectory()) {
throw new IllegalStateException(locksFolder + " is supposed to be a directory");
}
this.locksFolder = locksFolder;
this.locks = new HashMap<>();
}
@Override
public boolean isStarted() {
return started;
}
@Override
public void addUnavailableManagerListener(UnavailableManagerListener listener) {
// noop
}
@Override
public void removeUnavailableManagerListener(UnavailableManagerListener listener) {
// noop
}
@Override
public boolean start(long timeout, TimeUnit unit) throws InterruptedException, ExecutionException {
if (timeout >= 0) {
Objects.requireNonNull(unit);
}
if (started) {
return true;
}
started = true;
return true;
}
@Override
public void start() throws InterruptedException, ExecutionException {
start(-1, null);
}
@Override
public void stop() {
if (!started) {
return;
}
try {
locks.forEach((lockId, lock) -> {
try {
lock.close(false);
} catch (Throwable t) {
// TODO no op for now: log would be better!
}
});
locks.clear();
} finally {
started = false;
}
}
@Override
public DistributedLock getDistributedLock(String lockId) throws ExecutionException {
Objects.requireNonNull(lockId);
if (!started) {
throw new IllegalStateException("manager should be started first");
}
final FileDistributedLock lock = locks.get(lockId);
if (lock != null && !lock.isClosed()) {
return lock;
}
try {
final FileDistributedLock newLock = new FileDistributedLock(locks::remove, locksFolder, lockId);
locks.put(lockId, newLock);
return newLock;
} catch (IOException ioEx) {
throw new ExecutionException(ioEx);
}
}
@Override
public MutableLong getMutableLong(String mutableLongId) throws InterruptedException, ExecutionException, TimeoutException {
// TODO
return null;
}
}

View File

@ -0,0 +1,141 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.quorum.file;
import java.io.File;
import java.io.IOException;
import java.nio.channels.FileChannel;
import java.nio.channels.FileLock;
import java.nio.channels.OverlappingFileLockException;
import java.nio.file.StandardOpenOption;
import java.util.function.Consumer;
import org.apache.activemq.artemis.quorum.DistributedLock;
final class FileDistributedLock implements DistributedLock {
private final String lockId;
private final Consumer<String> onClosedLock;
private boolean closed;
private FileLock fileLock;
private final FileChannel channel;
FileDistributedLock(Consumer<String> onClosedLock, File locksFolder, String lockId) throws IOException {
this.onClosedLock = onClosedLock;
this.lockId = lockId;
this.closed = false;
this.fileLock = null;
this.channel = FileChannel.open(new File(locksFolder, lockId).toPath(), StandardOpenOption.CREATE, StandardOpenOption.READ, StandardOpenOption.WRITE);
}
private void checkNotClosed() {
if (closed) {
throw new IllegalStateException("This lock is closed");
}
}
@Override
public String getLockId() {
checkNotClosed();
return lockId;
}
@Override
public boolean isHeldByCaller() {
checkNotClosed();
final FileLock fileLock = this.fileLock;
if (fileLock == null) {
return false;
}
return fileLock.isValid();
}
@Override
public boolean tryLock() {
checkNotClosed();
final FileLock fileLock = this.fileLock;
if (fileLock != null) {
throw new IllegalStateException("unlock first");
}
final FileLock lock;
try {
lock = channel.tryLock();
} catch (OverlappingFileLockException o) {
// this process already hold this lock, but not this manager
return false;
} catch (Throwable t) {
throw new IllegalStateException(t);
}
if (lock == null) {
return false;
}
this.fileLock = lock;
return true;
}
@Override
public void unlock() {
checkNotClosed();
final FileLock fileLock = this.fileLock;
if (fileLock != null) {
this.fileLock = null;
try {
fileLock.close();
} catch (IOException e) {
// noop
}
}
}
@Override
public void addListener(UnavailableLockListener listener) {
checkNotClosed();
// noop
}
@Override
public void removeListener(UnavailableLockListener listener) {
checkNotClosed();
// noop
}
public boolean isClosed() {
return closed;
}
public void close(boolean useCallback) {
if (closed) {
return;
}
try {
if (useCallback) {
onClosedLock.accept(lockId);
}
unlock();
channel.close();
} catch (IOException e) {
// ignore it
} finally {
closed = true;
}
}
@Override
public void close() {
close(true);
}
}

View File

@ -0,0 +1,171 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.quorum.zookeeper;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.UUID;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.TimeUnit;
import org.apache.activemq.artemis.quorum.DistributedLock;
import org.apache.activemq.artemis.quorum.UnavailableStateException;
import org.apache.activemq.artemis.quorum.zookeeper.CuratorDistributedPrimitiveManager.PrimitiveId;
import org.apache.curator.framework.recipes.locks.InterProcessSemaphoreV2;
import org.apache.curator.framework.recipes.locks.Lease;
final class CuratorDistributedLock extends CuratorDistributedPrimitive implements DistributedLock {
private final InterProcessSemaphoreV2 ipcSem;
private final CopyOnWriteArrayList<UnavailableLockListener> listeners;
private Lease lease;
private byte[] leaseVersion;
CuratorDistributedLock(PrimitiveId id, CuratorDistributedPrimitiveManager manager, InterProcessSemaphoreV2 ipcSem) {
super(id, manager);
this.ipcSem = ipcSem;
this.listeners = new CopyOnWriteArrayList<>();
this.leaseVersion = null;
}
@Override
protected void handleReconnected() {
super.handleReconnected();
if (leaseVersion != null) {
assert lease != null;
try {
if (Arrays.equals(lease.getData(), leaseVersion)) {
return;
}
onLost();
} catch (Exception e) {
onLost();
}
}
}
@Override
protected void handleLost() {
super.handleLost();
lease = null;
leaseVersion = null;
for (UnavailableLockListener listener : listeners) {
listener.onUnavailableLockEvent();
}
}
@Override
public String getLockId() {
return getId().id;
}
@Override
public boolean isHeldByCaller() throws UnavailableStateException {
return run(() -> {
checkUnavailable();
if (lease == null) {
return false;
}
assert leaseVersion != null;
try {
return Arrays.equals(lease.getData(), leaseVersion);
} catch (Throwable t) {
throw new UnavailableStateException(t);
}
});
}
@Override
public boolean tryLock() throws UnavailableStateException, InterruptedException {
return tryRun(() -> {
if (lease != null) {
throw new IllegalStateException("unlock first");
}
checkUnavailable();
try {
final byte[] leaseVersion = UUID.randomUUID().toString().getBytes(StandardCharsets.UTF_8);
ipcSem.setNodeData(leaseVersion);
lease = ipcSem.acquire(0, TimeUnit.NANOSECONDS);
if (lease == null) {
ipcSem.setNodeData(null);
return false;
}
this.leaseVersion = leaseVersion;
assert Arrays.equals(lease.getData(), leaseVersion);
return true;
} catch (InterruptedException ie) {
throw ie;
} catch (Throwable e) {
throw new UnavailableStateException(e);
}
});
}
@Override
public void unlock() throws UnavailableStateException {
run(() -> {
checkUnavailable();
final Lease lease = this.lease;
if (lease != null) {
this.lease = null;
this.leaseVersion = null;
try {
ipcSem.returnLease(lease);
} catch (Throwable e) {
throw new UnavailableStateException(e);
}
}
return null;
});
}
@Override
public void addListener(UnavailableLockListener listener) {
run(() -> {
listeners.add(listener);
fireUnavailableListener(listener::onUnavailableLockEvent);
return null;
});
}
@Override
public void removeListener(UnavailableLockListener listener) {
run(() -> {
listeners.remove(listener);
return null;
});
}
@Override
protected void handleClosed() {
super.handleClosed();
listeners.clear();
final Lease lease = this.lease;
if (lease == null) {
return;
}
this.lease = null;
if (isUnavailable()) {
return;
}
try {
ipcSem.returnLease(lease);
} catch (Throwable t) {
// TODO silent, but debug ;)
}
}
}

View File

@ -0,0 +1,172 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.quorum.zookeeper;
import org.apache.activemq.artemis.quorum.UnavailableStateException;
import org.apache.activemq.artemis.quorum.zookeeper.CuratorDistributedPrimitiveManager.PrimitiveId;
import static org.apache.activemq.artemis.quorum.zookeeper.CuratorDistributedPrimitiveManager.PrimitiveType.validatePrimitiveInstance;
public abstract class CuratorDistributedPrimitive implements AutoCloseable {
// this is used to prevent deadlocks on close
private final CuratorDistributedPrimitiveManager manager;
private final PrimitiveId id;
private boolean unavailable;
private boolean closed;
protected CuratorDistributedPrimitive(PrimitiveId id, CuratorDistributedPrimitiveManager manager) {
this.id = id;
this.manager = manager;
this.closed = false;
this.unavailable = false;
validatePrimitiveInstance(this);
}
final PrimitiveId getId() {
return id;
}
final void onReconnected() {
synchronized (manager) {
if (closed || unavailable) {
return;
}
handleReconnected();
}
}
protected void handleReconnected() {
}
final void onLost() {
synchronized (manager) {
if (closed || unavailable) {
return;
}
unavailable = true;
handleLost();
}
}
protected void handleLost() {
}
final void onSuspended() {
synchronized (manager) {
if (closed || unavailable) {
return;
}
handleSuspended();
}
}
protected void handleSuspended() {
}
final void onRemoved() {
close(false);
}
private void checkNotClosed() {
if (closed) {
throw new IllegalStateException("This lock is closed");
}
}
@FunctionalInterface
protected interface PrimitiveAction<R, T extends Throwable> {
R call() throws T;
}
@FunctionalInterface
protected interface InterruptablePrimitiveAction<R, T extends Throwable> {
R call() throws InterruptedException, T;
}
protected final void checkUnavailable() throws UnavailableStateException {
if (unavailable) {
throw new UnavailableStateException(id.type + " with id = " + id.id + " isn't available");
}
}
protected final void fireUnavailableListener(Runnable task) {
run(() -> {
if (!unavailable) {
return false;
}
manager.startHandlingEvents();
try {
task.run();
} finally {
manager.completeHandlingEvents();
}
return true;
});
}
protected final <R, T extends Throwable> R run(PrimitiveAction<R, T> action) throws T {
synchronized (manager) {
manager.checkHandlingEvents();
checkNotClosed();
return action.call();
}
}
protected final <R, T extends Throwable> R tryRun(InterruptablePrimitiveAction<R, T> action) throws InterruptedException, T {
synchronized (manager) {
manager.checkHandlingEvents();
checkNotClosed();
return action.call();
}
}
private void close(boolean remove) {
synchronized (manager) {
manager.checkHandlingEvents();
if (closed) {
return;
}
closed = true;
if (remove) {
manager.remove(this);
}
handleClosed();
}
}
protected void handleClosed() {
}
protected final boolean isUnavailable() {
synchronized (manager) {
return unavailable;
}
}
@Override
public final void close() {
close(true);
}
}

View File

@ -0,0 +1,367 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.quorum.zookeeper;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.activemq.artemis.quorum.DistributedLock;
import org.apache.activemq.artemis.quorum.DistributedPrimitiveManager;
import org.apache.activemq.artemis.quorum.MutableLong;
import org.apache.curator.framework.CuratorFramework;
import org.apache.curator.framework.CuratorFrameworkFactory;
import org.apache.curator.framework.recipes.atomic.DistributedAtomicLong;
import org.apache.curator.framework.recipes.locks.InterProcessSemaphoreV2;
import org.apache.curator.framework.state.ConnectionState;
import org.apache.curator.framework.state.ConnectionStateListener;
import org.apache.curator.retry.RetryForever;
import org.apache.curator.retry.RetryNTimes;
import static java.util.Objects.requireNonNull;
import static java.util.stream.Collectors.joining;
public class CuratorDistributedPrimitiveManager implements DistributedPrimitiveManager, ConnectionStateListener {
enum PrimitiveType {
lock, mutableLong;
static <T extends CuratorDistributedPrimitive> T validatePrimitiveInstance(T primitive) {
if (primitive == null) {
return null;
}
boolean valid = false;
switch (primitive.getId().type) {
case lock:
valid = primitive instanceof CuratorDistributedLock;
break;
case mutableLong:
valid = primitive instanceof CuratorMutableLong;
break;
}
if (!valid) {
throw new AssertionError("Implementation error: " + primitive.getClass() + " is wrongly considered " + primitive.getId().type);
}
return primitive;
}
}
static final class PrimitiveId {
final String id;
final PrimitiveType type;
private PrimitiveId(String id, PrimitiveType type) {
this.id = requireNonNull(id);
this.type = requireNonNull(type);
}
static PrimitiveId of(String id, PrimitiveType type) {
return new PrimitiveId(id, type);
}
@Override
public boolean equals(Object o) {
if (this == o)
return true;
if (o == null || getClass() != o.getClass())
return false;
PrimitiveId that = (PrimitiveId) o;
if (!Objects.equals(id, that.id))
return false;
return type == that.type;
}
@Override
public int hashCode() {
int result = id != null ? id.hashCode() : 0;
result = 31 * result + (type != null ? type.hashCode() : 0);
return result;
}
}
private static final String CONNECT_STRING_PARAM = "connect-string";
private static final String NAMESPACE_PARAM = "namespace";
private static final String SESSION_MS_PARAM = "session-ms";
private static final String SESSION_PERCENT_PARAM = "session-percent";
private static final String CONNECTION_MS_PARAM = "connection-ms";
private static final String RETRIES_PARAM = "retries";
private static final String RETRIES_MS_PARAM = "retries-ms";
private static final Set<String> VALID_PARAMS = Stream.of(
CONNECT_STRING_PARAM,
NAMESPACE_PARAM,
SESSION_MS_PARAM,
SESSION_PERCENT_PARAM,
CONNECTION_MS_PARAM,
RETRIES_PARAM,
RETRIES_MS_PARAM).collect(Collectors.toSet());
private static final String VALID_PARAMS_ON_ERROR = VALID_PARAMS.stream().collect(joining(","));
// It's 9 times the default ZK tick time ie 2000 ms
private static final String DEFAULT_SESSION_TIMEOUT_MS = Integer.toString(18_000);
private static final String DEFAULT_CONNECTION_TIMEOUT_MS = Integer.toString(8_000);
private static final String DEFAULT_RETRIES = Integer.toString(1);
private static final String DEFAULT_RETRIES_MS = Integer.toString(1000);
// why 1/3 of the session? https://cwiki.apache.org/confluence/display/CURATOR/TN14
private static final String DEFAULT_SESSION_PERCENT = Integer.toString(33);
private static Map<String, String> validateParameters(Map<String, String> config) {
config.forEach((parameterName, ignore) -> validateParameter(parameterName));
return config;
}
private static void validateParameter(String parameterName) {
if (!VALID_PARAMS.contains(parameterName)) {
throw new IllegalArgumentException("non existent parameter " + parameterName + ": accepted list is " + VALID_PARAMS_ON_ERROR);
}
}
private CuratorFramework client;
private final Map<PrimitiveId, CuratorDistributedPrimitive> primitives;
private CopyOnWriteArrayList<UnavailableManagerListener> listeners;
private boolean unavailable;
private boolean handlingEvents;
private final CuratorFrameworkFactory.Builder curatorBuilder;
public CuratorDistributedPrimitiveManager(Map<String, String> config) {
this(validateParameters(config), true);
}
private CuratorDistributedPrimitiveManager(Map<String, String> config, boolean ignore) {
this(config.get(CONNECT_STRING_PARAM),
config.get(NAMESPACE_PARAM),
Integer.parseInt(config.getOrDefault(SESSION_MS_PARAM, DEFAULT_SESSION_TIMEOUT_MS)),
Integer.parseInt(config.getOrDefault(SESSION_PERCENT_PARAM, DEFAULT_SESSION_PERCENT)),
Integer.parseInt(config.getOrDefault(CONNECTION_MS_PARAM, DEFAULT_CONNECTION_TIMEOUT_MS)),
Integer.parseInt(config.getOrDefault(RETRIES_PARAM, DEFAULT_RETRIES)),
Integer.parseInt(config.getOrDefault(RETRIES_MS_PARAM, DEFAULT_RETRIES_MS)));
}
private CuratorDistributedPrimitiveManager(String connectString,
String namespace,
int sessionMs,
int sessionPercent,
int connectionMs,
int retries,
int retriesMs) {
curatorBuilder = CuratorFrameworkFactory.builder()
.connectString(connectString)
.namespace(namespace)
.sessionTimeoutMs(sessionMs)
.connectionTimeoutMs(connectionMs)
.retryPolicy(retries >= 0 ? new RetryNTimes(retries, retriesMs) : new RetryForever(retriesMs))
.simulatedSessionExpirationPercent(sessionPercent);
this.primitives = new HashMap<>();
this.listeners = null;
this.unavailable = false;
this.handlingEvents = false;
}
@Override
public synchronized boolean isStarted() {
checkHandlingEvents();
return client != null;
}
@Override
public synchronized void addUnavailableManagerListener(UnavailableManagerListener listener) {
checkHandlingEvents();
if (listeners == null) {
return;
}
listeners.add(listener);
if (unavailable) {
startHandlingEvents();
try {
listener.onUnavailableManagerEvent();
} finally {
completeHandlingEvents();
}
}
}
@Override
public synchronized void removeUnavailableManagerListener(UnavailableManagerListener listener) {
checkHandlingEvents();
if (listeners == null) {
return;
}
listeners.remove(listener);
}
@Override
public synchronized boolean start(long timeout, TimeUnit unit) throws InterruptedException, ExecutionException {
checkHandlingEvents();
if (timeout >= 0) {
if (timeout > Integer.MAX_VALUE) {
throw new IllegalArgumentException("curator manager won't support too long timeout ie >" + Integer.MAX_VALUE);
}
requireNonNull(unit);
}
if (client != null) {
return true;
}
final CuratorFramework client = curatorBuilder.build();
try {
client.start();
if (!client.blockUntilConnected((int) timeout, unit)) {
client.close();
return false;
}
this.client = client;
this.listeners = new CopyOnWriteArrayList<>();
client.getConnectionStateListenable().addListener(this);
return true;
} catch (InterruptedException e) {
client.close();
throw e;
}
}
@Override
public synchronized void start() throws InterruptedException, ExecutionException {
start(-1, null);
}
@Override
public synchronized void stop() {
checkHandlingEvents();
final CuratorFramework client = this.client;
if (client == null) {
return;
}
this.client = null;
unavailable = false;
listeners.clear();
this.listeners = null;
client.getConnectionStateListenable().removeListener(this);
primitives.forEach((id, primitive) -> {
try {
primitive.onRemoved();
} catch (Throwable t) {
// TODO log?
}
});
primitives.clear();
client.close();
}
private synchronized <T extends CuratorDistributedPrimitive> T getPrimitive(PrimitiveId id,
Function<PrimitiveId, ? extends T> primitiveFactory) {
checkHandlingEvents();
requireNonNull(id);
if (client == null) {
throw new IllegalStateException("manager isn't started yet!");
}
final CuratorDistributedPrimitive primitive = PrimitiveType.validatePrimitiveInstance(primitives.get(id));
if (primitive != null) {
return (T) primitive;
}
final T newPrimitive = PrimitiveType.validatePrimitiveInstance(primitiveFactory.apply(id));
primitives.put(id, newPrimitive);
if (unavailable) {
startHandlingEvents();
try {
newPrimitive.onLost();
} finally {
completeHandlingEvents();
}
}
return newPrimitive;
}
@Override
public DistributedLock getDistributedLock(String lockId) {
return getPrimitive(PrimitiveId.of(lockId, PrimitiveType.lock),
id -> new CuratorDistributedLock(id, this,
new InterProcessSemaphoreV2(client, "/" + id.id + "/locks", 1)));
}
@Override
public MutableLong getMutableLong(String mutableLongId) {
return getPrimitive(PrimitiveId.of(mutableLongId, PrimitiveType.mutableLong),
id -> new CuratorMutableLong(id, this,
new DistributedAtomicLong(client, "/" + mutableLongId + "/activation-sequence", new RetryNTimes(0, 0))));
}
protected void startHandlingEvents() {
handlingEvents = true;
}
protected void completeHandlingEvents() {
handlingEvents = false;
}
protected void checkHandlingEvents() {
if (client == null) {
return;
}
if (handlingEvents) {
throw new IllegalStateException("UnavailableManagerListener isn't supposed to modify the manager or its primitives on event handling!");
}
}
@Override
public synchronized void stateChanged(CuratorFramework client, ConnectionState newState) {
if (this.client != client) {
return;
}
if (unavailable) {
return;
}
startHandlingEvents();
try {
switch (newState) {
case LOST:
unavailable = true;
listeners.forEach(listener -> listener.onUnavailableManagerEvent());
primitives.forEach((id, primitive) -> primitive.onLost());
break;
case RECONNECTED:
primitives.forEach((id, primitive) -> primitive.onReconnected());
break;
case SUSPENDED:
primitives.forEach((id, primitive) -> primitive.onSuspended());
break;
}
} finally {
completeHandlingEvents();
}
}
/**
* Used for testing purposes
*/
public synchronized CuratorFramework getCurator() {
checkHandlingEvents();
return client;
}
public synchronized void remove(CuratorDistributedPrimitive primitive) {
checkHandlingEvents();
primitives.remove(primitive.getId());
}
}

View File

@ -0,0 +1,67 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.quorum.zookeeper;
import org.apache.activemq.artemis.quorum.MutableLong;
import org.apache.activemq.artemis.quorum.UnavailableStateException;
import org.apache.activemq.artemis.quorum.zookeeper.CuratorDistributedPrimitiveManager.PrimitiveId;
import org.apache.curator.framework.recipes.atomic.AtomicValue;
import org.apache.curator.framework.recipes.atomic.DistributedAtomicLong;
final class CuratorMutableLong extends CuratorDistributedPrimitive implements MutableLong {
private final DistributedAtomicLong atomicLong;
CuratorMutableLong(PrimitiveId id, CuratorDistributedPrimitiveManager manager, DistributedAtomicLong atomicLong) {
super(id, manager);
this.atomicLong = atomicLong;
}
@Override
public String getMutableLongId() {
return getId().id;
}
@Override
public long get() throws UnavailableStateException {
return run(() -> {
checkUnavailable();
try {
AtomicValue<Long> atomicValue = atomicLong.get();
if (!atomicValue.succeeded()) {
throw new UnavailableStateException("cannot query long " + getId());
}
return atomicValue.postValue();
} catch (Throwable e) {
throw new UnavailableStateException(e);
}
});
}
@Override
public void set(long value) throws UnavailableStateException {
run(() -> {
checkUnavailable();
try {
atomicLong.forceSet(value);
return null;
} catch (Throwable e) {
throw new UnavailableStateException(e);
}
});
}
}

View File

@ -0,0 +1,297 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.quorum;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.function.Consumer;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
public abstract class DistributedLockTest {
private final ArrayList<AutoCloseable> closeables = new ArrayList<>();
@Before
public void setupEnv() throws Throwable {
}
protected abstract void configureManager(Map<String, String> config);
protected abstract String managerClassName();
@After
public void tearDownEnv() throws Throwable {
closeables.forEach(closeables -> {
try {
closeables.close();
} catch (Throwable t) {
// silent here
}
});
}
protected DistributedPrimitiveManager createManagedDistributeManager() {
return createManagedDistributeManager(stringStringMap -> {
});
}
protected DistributedPrimitiveManager createManagedDistributeManager(Consumer<? super Map<String, String>> defaultConfiguration) {
try {
final HashMap<String, String> config = new HashMap<>();
configureManager(config);
defaultConfiguration.accept(config);
final DistributedPrimitiveManager manager = DistributedPrimitiveManager.newInstanceOf(managerClassName(), config);
closeables.add(manager);
return manager;
} catch (Exception e) {
throw new RuntimeException(e);
}
}
@Test
public void managerReturnsSameLockIfNotClosed() throws ExecutionException, InterruptedException, TimeoutException {
DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.start();
Assert.assertSame(manager.getDistributedLock("a"), manager.getDistributedLock("a"));
}
@Test
public void managerReturnsDifferentLocksIfClosed() throws ExecutionException, InterruptedException, TimeoutException {
DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.start();
DistributedLock closedLock = manager.getDistributedLock("a");
closedLock.close();
Assert.assertNotSame(closedLock, manager.getDistributedLock("a"));
}
@Test
public void managerReturnsDifferentLocksOnRestart() throws ExecutionException, InterruptedException, TimeoutException {
DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.start();
DistributedLock closedLock = manager.getDistributedLock("a");
manager.stop();
manager.start();
Assert.assertNotSame(closedLock, manager.getDistributedLock("a"));
}
@Test(expected = IllegalStateException.class)
public void managerCannotGetLockIfNotStarted() throws ExecutionException, InterruptedException, TimeoutException {
DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.getDistributedLock("a");
}
@Test(expected = NullPointerException.class)
public void managerCannotGetLockWithNullLockId() throws ExecutionException, InterruptedException, TimeoutException {
DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.start();
manager.getDistributedLock(null);
}
@Test
public void closingLockUnlockIt() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.start();
DistributedLock closedLock = manager.getDistributedLock("a");
Assert.assertTrue(closedLock.tryLock());
closedLock.close();
Assert.assertTrue(manager.getDistributedLock("a").tryLock());
}
@Test
public void managerStopUnlockLocks() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.start();
Assert.assertTrue(manager.getDistributedLock("a").tryLock());
Assert.assertTrue(manager.getDistributedLock("b").tryLock());
manager.stop();
manager.start();
Assert.assertFalse(manager.getDistributedLock("a").isHeldByCaller());
Assert.assertFalse(manager.getDistributedLock("b").isHeldByCaller());
}
@Test
public void acquireAndReleaseLock() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.start();
DistributedLock lock = manager.getDistributedLock("a");
Assert.assertFalse(lock.isHeldByCaller());
Assert.assertTrue(lock.tryLock());
Assert.assertTrue(lock.isHeldByCaller());
lock.unlock();
Assert.assertFalse(lock.isHeldByCaller());
}
@Test(expected = IllegalStateException.class)
public void cannotAcquireSameLockTwice() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.start();
DistributedLock lock = manager.getDistributedLock("a");
Assert.assertTrue(lock.tryLock());
lock.tryLock();
}
@Test
public void heldLockIsVisibleByDifferentManagers() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
DistributedPrimitiveManager ownerManager = createManagedDistributeManager();
DistributedPrimitiveManager observerManager = createManagedDistributeManager();
ownerManager.start();
observerManager.start();
Assert.assertTrue(ownerManager.getDistributedLock("a").tryLock());
Assert.assertTrue(ownerManager.getDistributedLock("a").isHeldByCaller());
Assert.assertFalse(observerManager.getDistributedLock("a").isHeldByCaller());
}
@Test
public void unlockedLockIsVisibleByDifferentManagers() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
DistributedPrimitiveManager ownerManager = createManagedDistributeManager();
DistributedPrimitiveManager observerManager = createManagedDistributeManager();
ownerManager.start();
observerManager.start();
Assert.assertTrue(ownerManager.getDistributedLock("a").tryLock());
ownerManager.getDistributedLock("a").unlock();
Assert.assertFalse(observerManager.getDistributedLock("a").isHeldByCaller());
Assert.assertFalse(ownerManager.getDistributedLock("a").isHeldByCaller());
}
@Test
public void cannotAcquireSameLockFromDifferentManagers() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
DistributedPrimitiveManager ownerManager = createManagedDistributeManager();
DistributedPrimitiveManager notOwnerManager = createManagedDistributeManager();
ownerManager.start();
notOwnerManager.start();
Assert.assertTrue(ownerManager.getDistributedLock("a").tryLock());
Assert.assertFalse(notOwnerManager.getDistributedLock("a").tryLock());
}
@Test
public void cannotUnlockFromNotOwnerManager() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
DistributedPrimitiveManager ownerManager = createManagedDistributeManager();
DistributedPrimitiveManager notOwnerManager = createManagedDistributeManager();
ownerManager.start();
notOwnerManager.start();
Assert.assertTrue(ownerManager.getDistributedLock("a").tryLock());
notOwnerManager.getDistributedLock("a").unlock();
Assert.assertFalse(notOwnerManager.getDistributedLock("a").isHeldByCaller());
Assert.assertTrue(ownerManager.getDistributedLock("a").isHeldByCaller());
}
@Test
public void timedTryLockSucceedWithShortTimeout() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.start();
DistributedLock backgroundLock = manager.getDistributedLock("a");
Assert.assertTrue(backgroundLock.tryLock(1, TimeUnit.NANOSECONDS));
}
@Test
public void timedTryLockFailAfterTimeout() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.start();
DistributedPrimitiveManager otherManager = createManagedDistributeManager();
otherManager.start();
Assert.assertTrue(otherManager.getDistributedLock("a").tryLock());
final long start = System.nanoTime();
final long timeoutSec = 1;
Assert.assertFalse(manager.getDistributedLock("a").tryLock(timeoutSec, TimeUnit.SECONDS));
final long elapsed = TimeUnit.NANOSECONDS.toSeconds(System.nanoTime() - start);
assertThat(elapsed, greaterThanOrEqualTo(timeoutSec));
}
@Test
public void timedTryLockSuccess() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.start();
DistributedPrimitiveManager otherManager = createManagedDistributeManager();
otherManager.start();
Assert.assertTrue(otherManager.getDistributedLock("a").tryLock());
DistributedLock backgroundLock = manager.getDistributedLock("a");
CompletableFuture<Boolean> acquired = new CompletableFuture<>();
CountDownLatch startedTry = new CountDownLatch(1);
Thread tryLockThread = new Thread(() -> {
startedTry.countDown();
try {
if (!backgroundLock.tryLock(Long.MAX_VALUE, TimeUnit.DAYS)) {
acquired.complete(false);
} else {
acquired.complete(true);
}
} catch (Throwable e) {
acquired.complete(false);
}
});
tryLockThread.start();
Assert.assertTrue(startedTry.await(10, TimeUnit.SECONDS));
otherManager.getDistributedLock("a").unlock();
Assert.assertTrue(acquired.get(4, TimeUnit.SECONDS));
}
@Test
public void interruptStopTimedTryLock() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.start();
DistributedPrimitiveManager otherManager = createManagedDistributeManager();
otherManager.start();
Assert.assertTrue(otherManager.getDistributedLock("a").tryLock());
DistributedLock backgroundLock = manager.getDistributedLock("a");
CompletableFuture<Boolean> interrupted = new CompletableFuture<>();
CountDownLatch startedTry = new CountDownLatch(1);
Thread tryLockThread = new Thread(() -> {
startedTry.countDown();
try {
backgroundLock.tryLock(Long.MAX_VALUE, TimeUnit.DAYS);
interrupted.complete(false);
} catch (UnavailableStateException e) {
interrupted.complete(false);
} catch (InterruptedException e) {
interrupted.complete(true);
}
});
tryLockThread.start();
Assert.assertTrue(startedTry.await(10, TimeUnit.SECONDS));
// let background lock to perform some tries
TimeUnit.SECONDS.sleep(1);
tryLockThread.interrupt();
Assert.assertTrue(interrupted.get(4, TimeUnit.SECONDS));
}
@Test
public void lockAndMutableLongWithSameIdCanExistsTogether() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.start();
final String id = "a";
Assert.assertTrue(manager.getDistributedLock(id).tryLock());
Assert.assertEquals(0, manager.getMutableLong(id).get());
manager.getMutableLong(id).set(1);
Assert.assertTrue(manager.getDistributedLock(id).isHeldByCaller());
Assert.assertEquals(1, manager.getMutableLong(id).get());
}
}

View File

@ -0,0 +1,70 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.quorum.file;
import java.io.File;
import java.lang.reflect.InvocationTargetException;
import java.util.Collections;
import java.util.Map;
import org.apache.activemq.artemis.quorum.DistributedLockTest;
import org.apache.activemq.artemis.quorum.DistributedPrimitiveManager;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
public class FileDistributedLockTest extends DistributedLockTest {
@Rule
public TemporaryFolder tmpFolder = new TemporaryFolder();
private File locksFolder;
@Before
@Override
public void setupEnv() throws Throwable {
locksFolder = tmpFolder.newFolder("locks-folder");
super.setupEnv();
}
@Override
protected void configureManager(Map<String, String> config) {
config.put("locks-folder", locksFolder.toString());
}
@Override
protected String managerClassName() {
return FileBasedPrimitiveManager.class.getName();
}
@Test
public void reflectiveManagerCreation() throws Exception {
DistributedPrimitiveManager.newInstanceOf(managerClassName(), Collections.singletonMap("locks-folder", locksFolder.toString()));
}
@Test(expected = InvocationTargetException.class)
public void reflectiveManagerCreationFailWithoutLocksFolder() throws Exception {
DistributedPrimitiveManager.newInstanceOf(managerClassName(), Collections.emptyMap());
}
@Test(expected = InvocationTargetException.class)
public void reflectiveManagerCreationFailIfLocksFolderIsNotFolder() throws Exception {
DistributedPrimitiveManager.newInstanceOf(managerClassName(), Collections.singletonMap("locks-folder", tmpFolder.newFile().toString()));
}
}

View File

@ -0,0 +1,364 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.quorum.zookeeper;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;
import java.util.stream.Collectors;
import com.google.common.base.Predicates;
import org.apache.activemq.artemis.quorum.DistributedLock;
import org.apache.activemq.artemis.quorum.DistributedPrimitiveManager;
import org.apache.activemq.artemis.quorum.UnavailableStateException;
import org.apache.activemq.artemis.utils.Wait;
import org.apache.curator.test.InstanceSpec;
import org.apache.curator.test.TestingCluster;
import org.apache.activemq.artemis.quorum.DistributedLockTest;
import org.apache.curator.test.TestingZooKeeperServer;
import org.junit.Assert;
import org.junit.Assume;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import static java.lang.Boolean.TRUE;
import static org.hamcrest.Matchers.greaterThan;
@RunWith(value = Parameterized.class)
public class CuratorDistributedLockTest extends DistributedLockTest {
private static final int BASE_SERVER_PORT = 6666;
private static final int CONNECTION_MS = 2000;
// Beware: the server tick must be small enough that to let the session to be correctly expired
private static final int SESSION_MS = 6000;
private static final int SERVER_TICK_MS = 2000;
private static final int RETRIES_MS = 100;
private static final int RETRIES = 1;
@Parameterized.Parameter
public int nodes;
@Rule
public TemporaryFolder tmpFolder = new TemporaryFolder();
private TestingCluster testingServer;
private InstanceSpec[] clusterSpecs;
private String connectString;
@Parameterized.Parameters(name = "nodes={0}")
public static Iterable<Object[]> getTestParameters() {
return Arrays.asList(new Object[][]{{3}, {5}});
}
@Override
public void setupEnv() throws Throwable {
clusterSpecs = new InstanceSpec[nodes];
for (int i = 0; i < nodes; i++) {
clusterSpecs[i] = new InstanceSpec(tmpFolder.newFolder(), BASE_SERVER_PORT + i, -1, -1, true, -1, SERVER_TICK_MS, -1);
}
testingServer = new TestingCluster(clusterSpecs);
testingServer.start();
// start waits for quorumPeer!=null but not that it has started...
Wait.waitFor(this::ensembleHasLeader);
connectString = testingServer.getConnectString();
super.setupEnv();
}
@Override
public void tearDownEnv() throws Throwable {
super.tearDownEnv();
testingServer.close();
}
@Override
protected void configureManager(Map<String, String> config) {
config.put("connect-string", connectString);
config.put("session-ms", Integer.toString(SESSION_MS));
config.put("connection-ms", Integer.toString(CONNECTION_MS));
config.put("retries", Integer.toString(RETRIES));
config.put("retries-ms", Integer.toString(RETRIES_MS));
}
@Override
protected String managerClassName() {
return CuratorDistributedPrimitiveManager.class.getName();
}
@Test(expected = RuntimeException.class)
public void cannotCreateManagerWithNotValidParameterNames() {
final DistributedPrimitiveManager manager = createManagedDistributeManager(config -> config.put("_", "_"));
}
@Test
public void canAcquireLocksFromDifferentNamespace() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
final DistributedPrimitiveManager manager1 = createManagedDistributeManager(config -> config.put("namespace", "1"));
manager1.start();
final DistributedPrimitiveManager manager2 = createManagedDistributeManager(config -> config.put("namespace", "2"));
manager2.start();
Assert.assertTrue(manager1.getDistributedLock("a").tryLock());
Assert.assertTrue(manager2.getDistributedLock("a").tryLock());
}
@Test
public void cannotStartManagerWithDisconnectedServer() throws IOException, ExecutionException, InterruptedException {
final DistributedPrimitiveManager manager = createManagedDistributeManager();
testingServer.close();
Assert.assertFalse(manager.start(1, TimeUnit.SECONDS));
}
@Test(expected = UnavailableStateException.class)
public void cannotAcquireLockWithDisconnectedServer() throws IOException, ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
final DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.start();
final DistributedLock lock = manager.getDistributedLock("a");
final CountDownLatch notAvailable = new CountDownLatch(1);
final DistributedLock.UnavailableLockListener listener = notAvailable::countDown;
lock.addListener(listener);
testingServer.close();
Assert.assertTrue(notAvailable.await(30, TimeUnit.SECONDS));
lock.tryLock();
}
@Test(expected = UnavailableStateException.class)
public void cannotTryLockWithDisconnectedServer() throws IOException, ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
final DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.start();
final DistributedLock lock = manager.getDistributedLock("a");
testingServer.close();
lock.tryLock();
}
@Test(expected = UnavailableStateException.class)
public void cannotCheckLockStatusWithDisconnectedServer() throws IOException, ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
final DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.start();
final DistributedLock lock = manager.getDistributedLock("a");
Assert.assertFalse(lock.isHeldByCaller());
Assert.assertTrue(lock.tryLock());
testingServer.close();
lock.isHeldByCaller();
}
@Test(expected = UnavailableStateException.class)
public void looseLockAfterServerStop() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException, IOException {
final DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.start();
final DistributedLock lock = manager.getDistributedLock("a");
Assert.assertTrue(lock.tryLock());
Assert.assertTrue(lock.isHeldByCaller());
final CountDownLatch notAvailable = new CountDownLatch(1);
final DistributedLock.UnavailableLockListener listener = notAvailable::countDown;
lock.addListener(listener);
Assert.assertEquals(1, notAvailable.getCount());
testingServer.close();
Assert.assertTrue(notAvailable.await(30, TimeUnit.SECONDS));
lock.isHeldByCaller();
}
@Test
public void canAcquireLockOnMajorityRestart() throws Exception {
final DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.start();
final DistributedLock lock = manager.getDistributedLock("a");
Assert.assertTrue(lock.tryLock());
Assert.assertTrue(lock.isHeldByCaller());
final CountDownLatch notAvailable = new CountDownLatch(1);
final DistributedLock.UnavailableLockListener listener = notAvailable::countDown;
lock.addListener(listener);
Assert.assertEquals(1, notAvailable.getCount());
testingServer.stop();
notAvailable.await();
manager.stop();
restartMajorityNodes(true);
final DistributedPrimitiveManager otherManager = createManagedDistributeManager();
otherManager.start();
// await more then the expected value, that depends by how curator session expiration is configured
TimeUnit.MILLISECONDS.sleep(SESSION_MS + SERVER_TICK_MS);
Assert.assertTrue(otherManager.getDistributedLock("a").tryLock());
}
@Test
public void cannotStartManagerWithoutQuorum() throws Exception {
Assume.assumeThat(nodes, greaterThan(1));
DistributedPrimitiveManager manager = createManagedDistributeManager();
stopMajorityNotLeaderNodes(true);
Assert.assertFalse(manager.start(2, TimeUnit.SECONDS));
Assert.assertFalse(manager.isStarted());
}
@Test(expected = UnavailableStateException.class)
public void cannotAcquireLockWithoutQuorum() throws Exception {
Assume.assumeThat(nodes, greaterThan(1));
DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.start();
stopMajorityNotLeaderNodes(true);
DistributedLock lock = manager.getDistributedLock("a");
lock.tryLock();
}
@Test
public void cannotCheckLockWithoutQuorum() throws Exception {
Assume.assumeThat(nodes, greaterThan(1));
DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.start();
stopMajorityNotLeaderNodes(true);
DistributedLock lock = manager.getDistributedLock("a");
final boolean held;
try {
held = lock.isHeldByCaller();
} catch (UnavailableStateException expected) {
return;
}
Assert.assertFalse(held);
}
@Test
public void canGetLockWithoutQuorum() throws Exception {
Assume.assumeThat(nodes, greaterThan(1));
DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.start();
stopMajorityNotLeaderNodes(true);
DistributedLock lock = manager.getDistributedLock("a");
Assert.assertNotNull(lock);
}
@Test
public void notifiedAsUnavailableWhileLoosingQuorum() throws Exception {
Assume.assumeThat(nodes, greaterThan(1));
DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.start();
DistributedLock lock = manager.getDistributedLock("a");
CountDownLatch unavailable = new CountDownLatch(1);
lock.addListener(unavailable::countDown);
stopMajorityNotLeaderNodes(true);
Assert.assertTrue(unavailable.await(SESSION_MS + SERVER_TICK_MS, TimeUnit.MILLISECONDS));
}
@Test
public void beNotifiedOnce() throws Exception {
Assume.assumeThat(nodes, greaterThan(1));
DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.start();
DistributedLock lock = manager.getDistributedLock("a");
final AtomicInteger unavailableManager = new AtomicInteger(0);
final AtomicInteger unavailableLock = new AtomicInteger(0);
manager.addUnavailableManagerListener(unavailableManager::incrementAndGet);
lock.addListener(unavailableLock::incrementAndGet);
stopMajorityNotLeaderNodes(true);
TimeUnit.MILLISECONDS.sleep(SESSION_MS + SERVER_TICK_MS + CONNECTION_MS);
Assert.assertEquals(1, unavailableLock.get());
Assert.assertEquals(1, unavailableManager.get());
}
@Test
public void beNotifiedOfUnavailabilityWhileBlockedOnTimedLock() throws Exception {
Assume.assumeThat(nodes, greaterThan(1));
DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.start();
DistributedLock lock = manager.getDistributedLock("a");
final AtomicInteger unavailableManager = new AtomicInteger(0);
final AtomicInteger unavailableLock = new AtomicInteger(0);
manager.addUnavailableManagerListener(unavailableManager::incrementAndGet);
lock.addListener(unavailableLock::incrementAndGet);
final DistributedPrimitiveManager otherManager = createManagedDistributeManager();
otherManager.start();
Assert.assertTrue(otherManager.getDistributedLock("a").tryLock());
final CountDownLatch startedTimedLock = new CountDownLatch(1);
final AtomicReference<Boolean> unavailableTimedLock = new AtomicReference<>(null);
Thread timedLock = new Thread(() -> {
startedTimedLock.countDown();
try {
lock.tryLock(Long.MAX_VALUE, TimeUnit.DAYS);
unavailableTimedLock.set(false);
} catch (UnavailableStateException e) {
unavailableTimedLock.set(true);
} catch (InterruptedException e) {
unavailableTimedLock.set(false);
}
});
timedLock.start();
Assert.assertTrue(startedTimedLock.await(10, TimeUnit.SECONDS));
TimeUnit.SECONDS.sleep(1);
stopMajorityNotLeaderNodes(true);
TimeUnit.MILLISECONDS.sleep(SESSION_MS + CONNECTION_MS);
Wait.waitFor(() -> unavailableLock.get() > 0, SERVER_TICK_MS);
Assert.assertEquals(1, unavailableManager.get());
Assert.assertEquals(TRUE, unavailableTimedLock.get());
}
@Test
public void beNotifiedOfAlreadyUnavailableManagerAfterAddingListener() throws Exception {
DistributedPrimitiveManager manager = createManagedDistributeManager();
manager.start();
final AtomicBoolean unavailable = new AtomicBoolean(false);
DistributedPrimitiveManager.UnavailableManagerListener managerListener = () -> {
unavailable.set(true);
};
manager.addUnavailableManagerListener(managerListener);
Assert.assertFalse(unavailable.get());
stopMajorityNotLeaderNodes(true);
Wait.waitFor(unavailable::get);
manager.removeUnavailableManagerListener(managerListener);
final AtomicInteger unavailableOnRegister = new AtomicInteger();
manager.addUnavailableManagerListener(unavailableOnRegister::incrementAndGet);
Assert.assertEquals(1, unavailableOnRegister.get());
unavailableOnRegister.set(0);
try (DistributedLock lock = manager.getDistributedLock("a")) {
lock.addListener(unavailableOnRegister::incrementAndGet);
Assert.assertEquals(1, unavailableOnRegister.get());
}
}
private boolean ensembleHasLeader() {
return testingServer.getServers().stream().filter(CuratorDistributedLockTest::isLeader).count() != 0;
}
private static boolean isLeader(TestingZooKeeperServer server) {
long leaderId = server.getQuorumPeer().getLeaderId();
long id = server.getQuorumPeer().getId();
return id == leaderId;
}
private void stopMajorityNotLeaderNodes(boolean fromLast) throws Exception {
List<TestingZooKeeperServer> followers = testingServer.getServers().stream().filter(Predicates.not(CuratorDistributedLockTest::isLeader)).collect(Collectors.toList());
final int quorum = (nodes / 2) + 1;
for (int i = 0; i < quorum; i++) {
final int nodeIndex = fromLast ? (followers.size() - 1) - i : i;
followers.get(nodeIndex).stop();
}
}
private void restartMajorityNodes(boolean startFromLast) throws Exception {
final int quorum = (nodes / 2) + 1;
for (int i = 0; i < quorum; i++) {
final int nodeIndex = startFromLast ? (nodes - 1) - i : i;
if (!testingServer.restartServer(clusterSpecs[nodeIndex])) {
throw new IllegalStateException("errored while restarting " + clusterSpecs[nodeIndex]);
}
}
}
}

View File

@ -0,0 +1,140 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.quorum.zookeeper;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.function.Consumer;
import org.apache.activemq.artemis.quorum.DistributedPrimitiveManager;
import org.apache.curator.framework.CuratorFramework;
import org.apache.curator.test.InstanceSpec;
import org.apache.curator.test.TestingCluster;
import org.apache.curator.utils.ZKPaths;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.ZooKeeper;
import org.apache.zookeeper.data.Stat;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
public class CuratorDistributedPrimitiveManagerTest {
private final ArrayList<AutoCloseable> autoCloseables = new ArrayList<>();
private static final int BASE_SERVER_PORT = 6666;
private static final int CONNECTION_MS = 2000;
// Beware: the server tick must be small enough that to let the session to be correctly expired
private static final int SESSION_MS = 6000;
private static final int SERVER_TICK_MS = 2000;
private static final int RETRIES_MS = 100;
private static final int RETRIES = 1;
public int nodes = 1;
@Rule
public TemporaryFolder tmpFolder = new TemporaryFolder();
private TestingCluster testingServer;
private String connectString;
@Before
public void setupEnv() throws Throwable {
InstanceSpec[] clusterSpecs = new InstanceSpec[nodes];
for (int i = 0; i < nodes; i++) {
clusterSpecs[i] = new InstanceSpec(tmpFolder.newFolder(), BASE_SERVER_PORT + i, -1, -1, true, -1, SERVER_TICK_MS, -1);
}
testingServer = new TestingCluster(clusterSpecs);
testingServer.start();
connectString = testingServer.getConnectString();
}
@After
public void tearDownEnv() throws Throwable {
autoCloseables.forEach(closeables -> {
try {
closeables.close();
} catch (Throwable t) {
// silent here
}
});
testingServer.close();
}
protected void configureManager(Map<String, String> config) {
config.put("connect-string", connectString);
config.put("session-ms", Integer.toString(SESSION_MS));
config.put("connection-ms", Integer.toString(CONNECTION_MS));
config.put("retries", Integer.toString(RETRIES));
config.put("retries-ms", Integer.toString(RETRIES_MS));
}
protected DistributedPrimitiveManager createManagedDistributeManager(Consumer<? super Map<String, String>> defaultConfiguration) {
try {
final HashMap<String, String> config = new HashMap<>();
configureManager(config);
defaultConfiguration.accept(config);
final DistributedPrimitiveManager manager = DistributedPrimitiveManager.newInstanceOf(managerClassName(), config);
autoCloseables.add(manager);
return manager;
} catch (Exception e) {
throw new RuntimeException(e);
}
}
protected String managerClassName() {
return CuratorDistributedPrimitiveManager.class.getName();
}
@Test
public void verifyLayoutInZK() throws Exception {
final DistributedPrimitiveManager manager = createManagedDistributeManager(config -> config.put("namespace", "activemq-artemis"));
manager.start();
Assert.assertTrue(manager.getDistributedLock("journal-identity-000-111").tryLock());
Assert.assertTrue(manager.getMutableLong("journal-identity-000-111").compareAndSet(0, 1));
CuratorFramework curatorFramework = ((CuratorDistributedPrimitiveManager)manager).getCurator();
List<String> entries = new LinkedList<>();
dumpZK(curatorFramework.getZookeeperClient().getZooKeeper(), "/", entries);
Assert.assertTrue(entries.get(2).contains("activation-sequence"));
for (String entry: entries) {
System.err.println("ZK: " + entry);
}
}
private void dumpZK(ZooKeeper zooKeeper, String path, List<String> entries) throws InterruptedException, KeeperException {
List<String> children = ZKPaths.getSortedChildren(zooKeeper,path);
for (String s: children) {
if (!s.equals("zookeeper")) {
String qualifiedPath = (path.endsWith("/") ? path : path + "/") + s;
Stat stat = new Stat();
zooKeeper.getData(qualifiedPath, null, stat);
entries.add(qualifiedPath + ", data-len:" + stat.getDataLength() + ", ephemeral: " + (stat.getEphemeralOwner() != 0));
dumpZK(zooKeeper, qualifiedPath, entries);
}
}
}
}

View File

@ -85,6 +85,11 @@
<artifactId>artemis-core-client</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.activemq</groupId>
<artifactId>artemis-quorum-api</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.activemq</groupId>
<artifactId>activemq-artemis-native</artifactId>

View File

@ -22,6 +22,8 @@ import java.util.List;
import org.apache.activemq.artemis.api.config.ActiveMQDefaultConfiguration;
import org.apache.activemq.artemis.api.core.ActiveMQIllegalStateException;
import org.apache.activemq.artemis.api.core.TransportConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ColocatedPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.LiveOnlyPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicaPolicyConfiguration;
@ -31,6 +33,8 @@ import org.apache.activemq.artemis.core.config.ha.SharedStoreSlavePolicyConfigur
import org.apache.activemq.artemis.core.server.ActiveMQMessageBundle;
import org.apache.activemq.artemis.core.server.ActiveMQServer;
import org.apache.activemq.artemis.core.server.ActiveMQServerLogger;
import org.apache.activemq.artemis.core.server.cluster.ha.ReplicationBackupPolicy;
import org.apache.activemq.artemis.core.server.cluster.ha.ReplicationPrimaryPolicy;
import org.apache.activemq.artemis.core.server.cluster.ha.BackupPolicy;
import org.apache.activemq.artemis.core.server.cluster.ha.ColocatedPolicy;
import org.apache.activemq.artemis.core.server.cluster.ha.HAPolicy;
@ -79,6 +83,11 @@ public final class ConfigurationUtils {
ReplicaPolicyConfiguration pc = (ReplicaPolicyConfiguration) conf;
return new ReplicaPolicy(pc.getClusterName(), pc.getMaxSavedReplicatedJournalsSize(), pc.getGroupName(), pc.isRestartBackup(), pc.isAllowFailBack(), pc.getInitialReplicationSyncTimeout(), getScaleDownPolicy(pc.getScaleDownConfiguration()), server.getNetworkHealthCheck(), pc.getVoteOnReplicationFailure(), pc.getQuorumSize(), pc.getVoteRetries(), pc.getVoteRetryWait(), pc.getQuorumVoteWait(), pc.getRetryReplicationWait());
}
case PRIMARY:
return ReplicationPrimaryPolicy.with((ReplicationPrimaryPolicyConfiguration) conf);
case BACKUP: {
return ReplicationBackupPolicy.with((ReplicationBackupPolicyConfiguration) conf);
}
case SHARED_STORE_MASTER: {
SharedStoreMasterPolicyConfiguration pc = (SharedStoreMasterPolicyConfiguration) conf;
return new SharedStoreMasterPolicy(pc.isFailoverOnServerShutdown(), pc.isWaitForActivation());

View File

@ -26,7 +26,9 @@ public interface HAPolicyConfiguration extends Serializable {
REPLICA("Replica"),
SHARED_STORE_MASTER("Shared Store Master"),
SHARED_STORE_SLAVE("Shared Store Slave"),
COLOCATED("Colocated");
COLOCATED("Colocated"),
PRIMARY("Primary"),
BACKUP("Backup");
private String name;

View File

@ -0,0 +1,39 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.core.config.ha;
import java.io.Serializable;
import java.util.Map;
public class DistributedPrimitiveManagerConfiguration implements Serializable {
private final String className;
private final Map<String, String> properties;
public DistributedPrimitiveManagerConfiguration(String className, Map<String, String> properties) {
this.className = className;
this.properties = properties;
}
public Map<String, String> getProperties() {
return properties;
}
public String getClassName() {
return className;
}
}

View File

@ -0,0 +1,140 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.core.config.ha;
import org.apache.activemq.artemis.api.config.ActiveMQDefaultConfiguration;
import org.apache.activemq.artemis.core.config.HAPolicyConfiguration;
public class ReplicationBackupPolicyConfiguration implements HAPolicyConfiguration {
private String clusterName = null;
private int maxSavedReplicatedJournalsSize = ActiveMQDefaultConfiguration.getDefaultMaxSavedReplicatedJournalsSize();
private String groupName = null;
/*
* used in the replicated policy after failover
* */
private boolean allowFailBack = false;
private long initialReplicationSyncTimeout = ActiveMQDefaultConfiguration.getDefaultInitialReplicationSyncTimeout();
private int voteRetries = ActiveMQDefaultConfiguration.getDefaultVoteRetries();
/**
* TODO: move if into {@link ActiveMQDefaultConfiguration} when the configuration is stable.
*/
private long voteRetryWait = 2000;
private long retryReplicationWait = ActiveMQDefaultConfiguration.getDefaultRetryReplicationWait();
private DistributedPrimitiveManagerConfiguration distributedManagerConfiguration = null;
public static final ReplicationBackupPolicyConfiguration withDefault() {
return new ReplicationBackupPolicyConfiguration();
}
private ReplicationBackupPolicyConfiguration() {
}
@Override
public HAPolicyConfiguration.TYPE getType() {
return TYPE.BACKUP;
}
public String getClusterName() {
return clusterName;
}
public ReplicationBackupPolicyConfiguration setClusterName(String clusterName) {
this.clusterName = clusterName;
return this;
}
public int getMaxSavedReplicatedJournalsSize() {
return maxSavedReplicatedJournalsSize;
}
public ReplicationBackupPolicyConfiguration setMaxSavedReplicatedJournalsSize(int maxSavedReplicatedJournalsSize) {
this.maxSavedReplicatedJournalsSize = maxSavedReplicatedJournalsSize;
return this;
}
public String getGroupName() {
return groupName;
}
public ReplicationBackupPolicyConfiguration setGroupName(String groupName) {
this.groupName = groupName;
return this;
}
public boolean isAllowFailBack() {
return allowFailBack;
}
public ReplicationBackupPolicyConfiguration setAllowFailBack(boolean allowFailBack) {
this.allowFailBack = allowFailBack;
return this;
}
public long getInitialReplicationSyncTimeout() {
return initialReplicationSyncTimeout;
}
public ReplicationBackupPolicyConfiguration setInitialReplicationSyncTimeout(long initialReplicationSyncTimeout) {
this.initialReplicationSyncTimeout = initialReplicationSyncTimeout;
return this;
}
public int getVoteRetries() {
return voteRetries;
}
public ReplicationBackupPolicyConfiguration setVoteRetries(int voteRetries) {
this.voteRetries = voteRetries;
return this;
}
public ReplicationBackupPolicyConfiguration setVoteRetryWait(long voteRetryWait) {
this.voteRetryWait = voteRetryWait;
return this;
}
public long getVoteRetryWait() {
return voteRetryWait;
}
public long getRetryReplicationWait() {
return retryReplicationWait;
}
public ReplicationBackupPolicyConfiguration setRetryReplicationWait(long retryReplicationWait) {
this.retryReplicationWait = retryReplicationWait;
return this;
}
public ReplicationBackupPolicyConfiguration setDistributedManagerConfiguration(DistributedPrimitiveManagerConfiguration configuration) {
this.distributedManagerConfiguration = configuration;
return this;
}
public DistributedPrimitiveManagerConfiguration getDistributedManagerConfiguration() {
return distributedManagerConfiguration;
}
}

View File

@ -0,0 +1,125 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.core.config.ha;
import org.apache.activemq.artemis.api.config.ActiveMQDefaultConfiguration;
import org.apache.activemq.artemis.core.config.HAPolicyConfiguration;
public class ReplicationPrimaryPolicyConfiguration implements HAPolicyConfiguration {
private boolean checkForLiveServer = ActiveMQDefaultConfiguration.isDefaultCheckForLiveServer();
private String groupName = null;
private String clusterName = null;
private long initialReplicationSyncTimeout = ActiveMQDefaultConfiguration.getDefaultInitialReplicationSyncTimeout();
private int voteRetries = ActiveMQDefaultConfiguration.getDefaultVoteRetries();
/**
* TODO: move if into {@link ActiveMQDefaultConfiguration} when the configuration is stable.
*/
private long voteRetryWait = 2000;
private Long retryReplicationWait = ActiveMQDefaultConfiguration.getDefaultRetryReplicationWait();
private DistributedPrimitiveManagerConfiguration distributedManagerConfiguration = null;
public static ReplicationPrimaryPolicyConfiguration withDefault() {
return new ReplicationPrimaryPolicyConfiguration();
}
private ReplicationPrimaryPolicyConfiguration() {
}
@Override
public TYPE getType() {
return TYPE.PRIMARY;
}
public boolean isCheckForLiveServer() {
return checkForLiveServer;
}
public ReplicationPrimaryPolicyConfiguration setCheckForLiveServer(boolean checkForLiveServer) {
this.checkForLiveServer = checkForLiveServer;
return this;
}
public String getGroupName() {
return groupName;
}
public ReplicationPrimaryPolicyConfiguration setGroupName(String groupName) {
this.groupName = groupName;
return this;
}
public String getClusterName() {
return clusterName;
}
public ReplicationPrimaryPolicyConfiguration setClusterName(String clusterName) {
this.clusterName = clusterName;
return this;
}
public long getInitialReplicationSyncTimeout() {
return initialReplicationSyncTimeout;
}
public ReplicationPrimaryPolicyConfiguration setInitialReplicationSyncTimeout(long initialReplicationSyncTimeout) {
this.initialReplicationSyncTimeout = initialReplicationSyncTimeout;
return this;
}
public int getVoteRetries() {
return voteRetries;
}
public ReplicationPrimaryPolicyConfiguration setVoteRetries(int voteRetries) {
this.voteRetries = voteRetries;
return this;
}
public ReplicationPrimaryPolicyConfiguration setVoteRetryWait(long voteRetryWait) {
this.voteRetryWait = voteRetryWait;
return this;
}
public long getVoteRetryWait() {
return voteRetryWait;
}
public void setRetryReplicationWait(Long retryReplicationWait) {
this.retryReplicationWait = retryReplicationWait;
}
public Long getRetryReplicationWait() {
return retryReplicationWait;
}
public ReplicationPrimaryPolicyConfiguration setDistributedManagerConfiguration(DistributedPrimitiveManagerConfiguration configuration) {
this.distributedManagerConfiguration = configuration;
return this;
}
public DistributedPrimitiveManagerConfiguration getDistributedManagerConfiguration() {
return distributedManagerConfiguration;
}
}

View File

@ -69,7 +69,10 @@ import org.apache.activemq.artemis.core.config.federation.FederationQueuePolicyC
import org.apache.activemq.artemis.core.config.federation.FederationStreamConfiguration;
import org.apache.activemq.artemis.core.config.federation.FederationTransformerConfiguration;
import org.apache.activemq.artemis.core.config.federation.FederationUpstreamConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ColocatedPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.DistributedPrimitiveManagerConfiguration;
import org.apache.activemq.artemis.core.config.ha.LiveOnlyPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicaPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicatedPolicyConfiguration;
@ -1607,6 +1610,16 @@ public final class FileConfigurationParser extends XMLConfigurationUtil {
Element colocatedNode = (Element) colocatedNodeList.item(0);
mainConfig.setHAPolicyConfiguration(createColocatedHaPolicy(colocatedNode, true));
}
NodeList primaryNodeList = e.getElementsByTagName("primary");
if (primaryNodeList.getLength() > 0) {
Element primaryNode = (Element) primaryNodeList.item(0);
mainConfig.setHAPolicyConfiguration(createReplicationPrimaryHaPolicy(primaryNode, mainConfig));
}
NodeList backupNodeList = e.getElementsByTagName("backup");
if (backupNodeList.getLength() > 0) {
Element backupNode = (Element) backupNodeList.item(0);
mainConfig.setHAPolicyConfiguration(createReplicationBackupHaPolicy(backupNode, mainConfig));
}
} else if (haNode.getTagName().equals("shared-store")) {
NodeList masterNodeList = e.getElementsByTagName("master");
if (masterNodeList.getLength() > 0) {
@ -1699,6 +1712,75 @@ public final class FileConfigurationParser extends XMLConfigurationUtil {
return configuration;
}
private ReplicationPrimaryPolicyConfiguration createReplicationPrimaryHaPolicy(Element policyNode, Configuration config) {
ReplicationPrimaryPolicyConfiguration configuration = ReplicationPrimaryPolicyConfiguration.withDefault();
configuration.setCheckForLiveServer(getBoolean(policyNode, "check-for-live-server", configuration.isCheckForLiveServer()));
configuration.setGroupName(getString(policyNode, "group-name", configuration.getGroupName(), Validators.NO_CHECK));
configuration.setClusterName(getString(policyNode, "cluster-name", configuration.getClusterName(), Validators.NO_CHECK));
configuration.setInitialReplicationSyncTimeout(getLong(policyNode, "initial-replication-sync-timeout", configuration.getInitialReplicationSyncTimeout(), Validators.GT_ZERO));
configuration.setVoteRetries(getInteger(policyNode, "vote-retries", configuration.getVoteRetries(), Validators.MINUS_ONE_OR_GE_ZERO));
configuration.setVoteRetryWait(getLong(policyNode, "vote-retry-wait", configuration.getVoteRetryWait(), Validators.GT_ZERO));
configuration.setRetryReplicationWait(getLong(policyNode, "retry-replication-wait", configuration.getVoteRetryWait(), Validators.GT_ZERO));
configuration.setDistributedManagerConfiguration(createDistributedPrimitiveManagerConfiguration(policyNode, config));
return configuration;
}
private ReplicationBackupPolicyConfiguration createReplicationBackupHaPolicy(Element policyNode, Configuration config) {
ReplicationBackupPolicyConfiguration configuration = ReplicationBackupPolicyConfiguration.withDefault();
configuration.setGroupName(getString(policyNode, "group-name", configuration.getGroupName(), Validators.NO_CHECK));
configuration.setAllowFailBack(getBoolean(policyNode, "allow-failback", configuration.isAllowFailBack()));
configuration.setInitialReplicationSyncTimeout(getLong(policyNode, "initial-replication-sync-timeout", configuration.getInitialReplicationSyncTimeout(), Validators.GT_ZERO));
configuration.setClusterName(getString(policyNode, "cluster-name", configuration.getClusterName(), Validators.NO_CHECK));
configuration.setMaxSavedReplicatedJournalsSize(getInteger(policyNode, "max-saved-replicated-journals-size", configuration.getMaxSavedReplicatedJournalsSize(), Validators.MINUS_ONE_OR_GE_ZERO));
configuration.setVoteRetries(getInteger(policyNode, "vote-retries", configuration.getVoteRetries(), Validators.MINUS_ONE_OR_GE_ZERO));
configuration.setVoteRetryWait(getLong(policyNode, "vote-retry-wait", configuration.getVoteRetryWait(), Validators.GT_ZERO));
configuration.setRetryReplicationWait(getLong(policyNode, "retry-replication-wait", configuration.getVoteRetryWait(), Validators.GT_ZERO));
configuration.setDistributedManagerConfiguration(createDistributedPrimitiveManagerConfiguration(policyNode, config));
return configuration;
}
private DistributedPrimitiveManagerConfiguration createDistributedPrimitiveManagerConfiguration(Element policyNode, Configuration config) {
final Element managerNode = (Element) policyNode.getElementsByTagName("manager").item(0);
final String className = getString(managerNode, "class-name",
ActiveMQDefaultConfiguration.getDefaultDistributedPrimitiveManagerClassName(),
Validators.NO_CHECK);
final Map<String, String> properties;
if (parameterExists(managerNode, "properties")) {
final NodeList propertyNodeList = managerNode.getElementsByTagName("property");
final int propertiesCount = propertyNodeList.getLength();
properties = new HashMap<>(propertiesCount);
for (int i = 0; i < propertiesCount; i++) {
final Element propertyNode = (Element) propertyNodeList.item(i);
final String propertyName = propertyNode.getAttributeNode("key").getValue();
final String propertyValue = propertyNode.getAttributeNode("value").getValue();
properties.put(propertyName, propertyValue);
}
} else {
properties = new HashMap<>(1);
}
return new DistributedPrimitiveManagerConfiguration(className, properties);
}
private SharedStoreMasterPolicyConfiguration createSharedStoreMasterHaPolicy(Element policyNode) {
SharedStoreMasterPolicyConfiguration configuration = new SharedStoreMasterPolicyConfiguration();

View File

@ -37,7 +37,6 @@ import org.apache.activemq.artemis.api.core.Interceptor;
import org.apache.activemq.artemis.api.core.Message;
import org.apache.activemq.artemis.api.core.SimpleString;
import org.apache.activemq.artemis.core.config.Configuration;
import org.apache.activemq.artemis.core.io.IOCriticalErrorListener;
import org.apache.activemq.artemis.core.io.SequentialFile;
import org.apache.activemq.artemis.core.journal.EncoderPersister;
import org.apache.activemq.artemis.core.journal.Journal;
@ -82,9 +81,8 @@ import org.apache.activemq.artemis.core.replication.ReplicationManager.ADD_OPERA
import org.apache.activemq.artemis.core.server.ActiveMQComponent;
import org.apache.activemq.artemis.core.server.ActiveMQMessageBundle;
import org.apache.activemq.artemis.core.server.ActiveMQServerLogger;
import org.apache.activemq.artemis.core.server.cluster.qourum.SharedNothingBackupQuorum;
import org.apache.activemq.artemis.core.server.impl.ActiveMQServerImpl;
import org.apache.activemq.artemis.core.server.impl.SharedNothingBackupActivation;
import org.apache.activemq.artemis.utils.actors.OrderedExecutorFactory;
import org.jboss.logging.Logger;
@ -94,12 +92,20 @@ import org.jboss.logging.Logger;
*/
public final class ReplicationEndpoint implements ChannelHandler, ActiveMQComponent {
public interface ReplicationEndpointEventListener {
void onRemoteBackupUpToDate();
void onLiveStopping(ReplicationLiveIsStoppingMessage.LiveStopping message) throws ActiveMQException;
void onLiveNodeId(String nodeId);
}
private static final Logger logger = Logger.getLogger(ReplicationEndpoint.class);
private final IOCriticalErrorListener criticalErrorListener;
private final ActiveMQServerImpl server;
private final boolean wantedFailBack;
private final SharedNothingBackupActivation activation;
private final ReplicationEndpointEventListener eventListener;
private final boolean noSync = false;
private Channel channel;
private boolean supportResponseBatching;
@ -129,8 +135,6 @@ public final class ReplicationEndpoint implements ChannelHandler, ActiveMQCompon
private boolean deletePages = true;
private volatile boolean started;
private SharedNothingBackupQuorum backupQuorum;
private Executor executor;
private List<Interceptor> outgoingInterceptors = null;
@ -140,13 +144,11 @@ public final class ReplicationEndpoint implements ChannelHandler, ActiveMQCompon
// Constructors --------------------------------------------------
public ReplicationEndpoint(final ActiveMQServerImpl server,
IOCriticalErrorListener criticalErrorListener,
boolean wantedFailBack,
SharedNothingBackupActivation activation) {
ReplicationEndpointEventListener eventListener) {
this.server = server;
this.criticalErrorListener = criticalErrorListener;
this.wantedFailBack = wantedFailBack;
this.activation = activation;
this.eventListener = eventListener;
this.pendingPackets = new ArrayDeque<>();
this.supportResponseBatching = false;
}
@ -287,7 +289,7 @@ public final class ReplicationEndpoint implements ChannelHandler, ActiveMQCompon
* @throws ActiveMQException
*/
private void handleLiveStopping(ReplicationLiveIsStoppingMessage packet) throws ActiveMQException {
activation.remoteFailOver(packet.isFinalMessage());
eventListener.onLiveStopping(packet.isFinalMessage());
}
@Override
@ -474,8 +476,8 @@ public final class ReplicationEndpoint implements ChannelHandler, ActiveMQCompon
}
journalsHolder = null;
backupQuorum.liveIDSet(liveID);
activation.setRemoteBackupUpToDate();
eventListener.onLiveNodeId(liveID);
eventListener.onRemoteBackupUpToDate();
if (logger.isTraceEnabled()) {
logger.trace("Backup is synchronized / BACKUP-SYNC-DONE");
@ -597,7 +599,7 @@ public final class ReplicationEndpoint implements ChannelHandler, ActiveMQCompon
if (packet.getNodeID() != null) {
// At the start of replication, we still do not know which is the nodeID that the live uses.
// This is the point where the backup gets this information.
backupQuorum.liveIDSet(packet.getNodeID());
eventListener.onLiveNodeId(packet.getNodeID());
}
break;
@ -900,16 +902,6 @@ public final class ReplicationEndpoint implements ChannelHandler, ActiveMQCompon
}
}
/**
* Sets the quorumManager used by the server in the replicationEndpoint. It is used to inform the
* backup server of the live's nodeID.
*
* @param backupQuorum
*/
public void setBackupQuorum(SharedNothingBackupQuorum backupQuorum) {
this.backupQuorum = backupQuorum;
}
/**
* @param executor2
*/

View File

@ -40,7 +40,6 @@ import org.apache.activemq.artemis.core.persistence.OperationContext;
import org.apache.activemq.artemis.core.persistence.StorageManager;
import org.apache.activemq.artemis.core.postoffice.PostOffice;
import org.apache.activemq.artemis.core.remoting.server.RemotingService;
import org.apache.activemq.artemis.core.replication.ReplicationEndpoint;
import org.apache.activemq.artemis.core.replication.ReplicationManager;
import org.apache.activemq.artemis.core.security.Role;
import org.apache.activemq.artemis.core.security.SecurityAuth;
@ -166,11 +165,6 @@ public interface ActiveMQServer extends ServiceComponent {
CriticalAnalyzer getCriticalAnalyzer();
/**
* @return
*/
ReplicationEndpoint getReplicationEndpoint();
/**
* it will release hold a lock for the activation.
*/

View File

@ -21,7 +21,6 @@ import org.apache.activemq.artemis.api.core.Pair;
import org.apache.activemq.artemis.api.core.TransportConfiguration;
import org.apache.activemq.artemis.api.core.client.ClusterTopologyListener;
import org.apache.activemq.artemis.core.client.impl.ServerLocatorInternal;
import org.apache.activemq.artemis.core.server.cluster.qourum.SharedNothingBackupQuorum;
/**
* A class that will locate a particular live server running in a cluster. How this live is chosen
@ -31,16 +30,23 @@ import org.apache.activemq.artemis.core.server.cluster.qourum.SharedNothingBacku
*/
public abstract class LiveNodeLocator implements ClusterTopologyListener {
private SharedNothingBackupQuorum backupQuorum;
@FunctionalInterface
public interface BackupRegistrationListener {
public LiveNodeLocator(SharedNothingBackupQuorum backupQuorum) {
this.backupQuorum = backupQuorum;
void onBackupRegistrationFailed(boolean alreadyReplicating);
}
private final BackupRegistrationListener backupRegistrationListener;
public LiveNodeLocator(BackupRegistrationListener backupRegistrationListener) {
this.backupRegistrationListener = backupRegistrationListener;
}
/**
* Use this constructor when the LiveNodeLocator is used for scaling down rather than replicating
*/
public LiveNodeLocator() {
this(null);
}
/**
@ -67,12 +73,8 @@ public abstract class LiveNodeLocator implements ClusterTopologyListener {
* tells the locator the the current connector has failed.
*/
public void notifyRegistrationFailed(boolean alreadyReplicating) {
if (backupQuorum != null) {
if (alreadyReplicating) {
backupQuorum.notifyAlreadyReplicating();
} else {
backupQuorum.notifyRegistrationFailed();
}
if (backupRegistrationListener != null) {
backupRegistrationListener.onBackupRegistrationFailed(alreadyReplicating);
}
}

View File

@ -79,6 +79,16 @@ public abstract class NodeManager implements ActiveMQComponent {
}
}
public long readDataVersion() throws NodeManagerException {
// TODO make it abstract
throw new UnsupportedOperationException("TODO");
}
public void writeDataVersion(long version) throws NodeManagerException {
// TODO make it abstract
throw new UnsupportedOperationException("TODO");
}
public abstract SimpleString readNodeId() throws NodeManagerException;
public UUID getUUID() {

View File

@ -80,10 +80,16 @@ public class ClusterController implements ActiveMQComponent {
private boolean started;
private SimpleString replicatedClusterName;
public ClusterController(ActiveMQServer server, ScheduledExecutorService scheduledExecutor) {
public ClusterController(ActiveMQServer server,
ScheduledExecutorService scheduledExecutor,
boolean useQuorumManager) {
this.server = server;
executor = server.getExecutorFactory().getExecutor();
quorumManager = new QuorumManager(scheduledExecutor, this);
quorumManager = useQuorumManager ? new QuorumManager(scheduledExecutor, this) : null;
}
public ClusterController(ActiveMQServer server, ScheduledExecutorService scheduledExecutor) {
this(server, scheduledExecutor, true);
}
@Override
@ -108,11 +114,11 @@ public class ClusterController implements ActiveMQComponent {
//latch so we know once we are connected
replicationClusterConnectedLatch = new CountDownLatch(1);
//and add the quorum manager as a topology listener
if (defaultLocator != null) {
defaultLocator.addClusterTopologyListener(quorumManager);
}
if (quorumManager != null) {
if (defaultLocator != null) {
defaultLocator.addClusterTopologyListener(quorumManager);
}
//start the quorum manager
quorumManager.start();
}
@ -126,6 +132,26 @@ public class ClusterController implements ActiveMQComponent {
}
}
/**
* It adds {@code clusterTopologyListener} to {@code defaultLocator}.
*/
public void addClusterTopologyListener(ClusterTopologyListener clusterTopologyListener) {
if (!this.started || defaultLocator == null) {
throw new IllegalStateException("the controller must be started and with a locator initialized");
}
this.defaultLocator.addClusterTopologyListener(clusterTopologyListener);
}
/**
* It remove {@code clusterTopologyListener} from {@code defaultLocator}.
*/
public void removeClusterTopologyListener(ClusterTopologyListener clusterTopologyListener) {
if (!this.started || defaultLocator == null) {
throw new IllegalStateException("the controller must be started and with a locator initialized");
}
this.defaultLocator.removeClusterTopologyListener(clusterTopologyListener);
}
@Override
public void stop() throws Exception {
if (logger.isDebugEnabled()) {
@ -138,7 +164,9 @@ public class ClusterController implements ActiveMQComponent {
serverLocatorInternal.close();
}
//stop the quorum manager
quorumManager.stop();
if (quorumManager != null) {
quorumManager.stop();
}
}
@Override
@ -223,6 +251,17 @@ public class ClusterController implements ActiveMQComponent {
}
}
/**
* add a cluster listener
*
* @param listener
*/
public void removeClusterTopologyListenerForReplication(ClusterTopologyListener listener) {
if (replicationLocator != null) {
replicationLocator.removeClusterTopologyListener(listener);
}
}
/**
* add an interceptor
*
@ -232,6 +271,15 @@ public class ClusterController implements ActiveMQComponent {
replicationLocator.addIncomingInterceptor(interceptor);
}
/**
* remove an interceptor
*
* @param interceptor
*/
public void removeIncomingInterceptorForReplication(Interceptor interceptor) {
replicationLocator.removeIncomingInterceptor(interceptor);
}
/**
* connect to a specific node in the cluster used for replication
*
@ -406,7 +454,11 @@ public class ClusterController implements ActiveMQComponent {
logger.debug("there is no acceptor used configured at the CoreProtocolManager " + this);
}
} else if (packet.getType() == PacketImpl.QUORUM_VOTE) {
quorumManager.handleQuorumVote(clusterChannel, packet);
if (quorumManager != null) {
quorumManager.handleQuorumVote(clusterChannel, packet);
} else {
logger.warnf("Received %s on a cluster connection that's using the new quorum vote algorithm.", packet);
}
} else if (packet.getType() == PacketImpl.SCALEDOWN_ANNOUNCEMENT) {
ScaleDownAnnounceMessage message = (ScaleDownAnnounceMessage) packet;
//we don't really need to check as it should always be true

View File

@ -157,7 +157,7 @@ public class ClusterManager implements ActiveMQComponent {
final ManagementService managementService,
final Configuration configuration,
final NodeManager nodeManager,
final boolean backup) {
final boolean useQuorumManager) {
this.executorFactory = executorFactory;
executor = executorFactory.getExecutor();
@ -174,7 +174,7 @@ public class ClusterManager implements ActiveMQComponent {
this.nodeManager = nodeManager;
clusterController = new ClusterController(server, scheduledExecutor);
clusterController = new ClusterController(server, scheduledExecutor, useQuorumManager);
haManager = server.getActivation().getHAManager();
}

View File

@ -57,4 +57,8 @@ public interface HAPolicy<T extends Activation> {
String getScaleDownClustername();
default boolean useQuorumManager() {
return true;
}
}

View File

@ -0,0 +1,176 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.core.server.cluster.ha;
import java.util.Map;
import java.util.Objects;
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.DistributedPrimitiveManagerConfiguration;
import org.apache.activemq.artemis.core.io.IOCriticalErrorListener;
import org.apache.activemq.artemis.core.server.impl.ActiveMQServerImpl;
import org.apache.activemq.artemis.core.server.impl.ReplicationBackupActivation;
import org.apache.activemq.artemis.quorum.DistributedPrimitiveManager;
public class ReplicationBackupPolicy implements HAPolicy<ReplicationBackupActivation> {
private final ReplicationPrimaryPolicy livePolicy;
private final String groupName;
private final String clusterName;
private final int maxSavedReplicatedJournalsSize;
private final int voteRetries;
private final long voteRetryWait;
private final long retryReplicationWait;
private final DistributedPrimitiveManagerConfiguration distributedManagerConfiguration;
private final boolean tryFailback;
private ReplicationBackupPolicy(ReplicationBackupPolicyConfiguration configuration,
ReplicationPrimaryPolicy livePolicy) {
Objects.requireNonNull(livePolicy);
this.clusterName = configuration.getClusterName();
this.maxSavedReplicatedJournalsSize = configuration.getMaxSavedReplicatedJournalsSize();
this.groupName = configuration.getGroupName();
this.voteRetries = configuration.getVoteRetries();
this.voteRetryWait = configuration.getVoteRetryWait();
this.retryReplicationWait = configuration.getRetryReplicationWait();
this.distributedManagerConfiguration = configuration.getDistributedManagerConfiguration();
this.tryFailback = true;
this.livePolicy = livePolicy;
}
private ReplicationBackupPolicy(ReplicationBackupPolicyConfiguration configuration) {
this.clusterName = configuration.getClusterName();
this.maxSavedReplicatedJournalsSize = configuration.getMaxSavedReplicatedJournalsSize();
this.groupName = configuration.getGroupName();
this.voteRetries = configuration.getVoteRetries();
this.voteRetryWait = configuration.getVoteRetryWait();
this.retryReplicationWait = configuration.getRetryReplicationWait();
this.distributedManagerConfiguration = configuration.getDistributedManagerConfiguration();
this.tryFailback = false;
livePolicy = ReplicationPrimaryPolicy.failoverPolicy(
configuration.getInitialReplicationSyncTimeout(),
configuration.getGroupName(),
configuration.getClusterName(),
this,
configuration.isAllowFailBack(),
configuration.getDistributedManagerConfiguration());
}
public boolean isTryFailback() {
return tryFailback;
}
/**
* It creates a policy which live policy won't cause to broker to try failback.
*/
public static ReplicationBackupPolicy with(ReplicationBackupPolicyConfiguration configuration) {
return new ReplicationBackupPolicy(configuration);
}
/**
* It creates a companion backup policy for a natural-born primary: it would cause the broker to try failback.
*/
static ReplicationBackupPolicy failback(int voteRetries,
long voteRetryWait,
long retryReplicationWait,
String clusterName,
String groupName,
ReplicationPrimaryPolicy livePolicy,
DistributedPrimitiveManagerConfiguration distributedManagerConfiguration) {
return new ReplicationBackupPolicy(ReplicationBackupPolicyConfiguration.withDefault()
.setVoteRetries(voteRetries)
.setVoteRetryWait(voteRetryWait)
.setRetryReplicationWait(retryReplicationWait)
.setClusterName(clusterName)
.setGroupName(groupName)
.setDistributedManagerConfiguration(distributedManagerConfiguration),
livePolicy);
}
@Override
public ReplicationBackupActivation createActivation(ActiveMQServerImpl server,
boolean wasLive,
Map<String, Object> activationParams,
IOCriticalErrorListener shutdownOnCriticalIO) throws Exception {
return new ReplicationBackupActivation(server, wasLive, DistributedPrimitiveManager.newInstanceOf(
distributedManagerConfiguration.getClassName(),
distributedManagerConfiguration.getProperties()), this);
}
@Override
public boolean isSharedStore() {
return false;
}
@Override
public boolean isBackup() {
return true;
}
@Override
public boolean canScaleDown() {
return false;
}
@Override
public String getScaleDownGroupName() {
return null;
}
@Override
public String getScaleDownClustername() {
return null;
}
public String getClusterName() {
return clusterName;
}
@Override
public String getBackupGroupName() {
return groupName;
}
public String getGroupName() {
return groupName;
}
public ReplicationPrimaryPolicy getLivePolicy() {
return livePolicy;
}
public int getMaxSavedReplicatedJournalsSize() {
return maxSavedReplicatedJournalsSize;
}
public int getVoteRetries() {
return voteRetries;
}
public long getVoteRetryWait() {
return voteRetryWait;
}
public long getRetryReplicationWait() {
return retryReplicationWait;
}
@Override
public boolean useQuorumManager() {
return false;
}
}

View File

@ -0,0 +1,166 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.core.server.cluster.ha;
import java.util.Map;
import java.util.Objects;
import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.DistributedPrimitiveManagerConfiguration;
import org.apache.activemq.artemis.core.io.IOCriticalErrorListener;
import org.apache.activemq.artemis.core.server.impl.ActiveMQServerImpl;
import org.apache.activemq.artemis.core.server.impl.ReplicationPrimaryActivation;
import org.apache.activemq.artemis.quorum.DistributedPrimitiveManager;
public class ReplicationPrimaryPolicy implements HAPolicy<ReplicationPrimaryActivation> {
private final ReplicationBackupPolicy backupPolicy;
private final String clusterName;
private final String groupName;
private final boolean checkForLiveServer;
private final long initialReplicationSyncTimeout;
private final DistributedPrimitiveManagerConfiguration distributedManagerConfiguration;
private final boolean allowAutoFailBack;
private ReplicationPrimaryPolicy(ReplicationPrimaryPolicyConfiguration configuration,
ReplicationBackupPolicy backupPolicy,
boolean allowAutoFailBack) {
Objects.requireNonNull(backupPolicy);
clusterName = configuration.getClusterName();
groupName = configuration.getGroupName();
checkForLiveServer = configuration.isCheckForLiveServer();
initialReplicationSyncTimeout = configuration.getInitialReplicationSyncTimeout();
distributedManagerConfiguration = configuration.getDistributedManagerConfiguration();
this.allowAutoFailBack = allowAutoFailBack;
this.backupPolicy = backupPolicy;
}
private ReplicationPrimaryPolicy(ReplicationPrimaryPolicyConfiguration config) {
clusterName = config.getClusterName();
groupName = config.getGroupName();
checkForLiveServer = config.isCheckForLiveServer();
initialReplicationSyncTimeout = config.getInitialReplicationSyncTimeout();
distributedManagerConfiguration = config.getDistributedManagerConfiguration();
this.allowAutoFailBack = false;
backupPolicy = ReplicationBackupPolicy.failback(config.getVoteRetries(), config.getVoteRetryWait(),
config.getRetryReplicationWait(), config.getClusterName(),
config.getGroupName(), this,
config.getDistributedManagerConfiguration());
}
/**
* It creates a companion failing-over primary policy for a natural-born backup: it's allowed to allow auto fail-back
* only if configured to do it.
*/
static ReplicationPrimaryPolicy failoverPolicy(long initialReplicationSyncTimeout,
String groupName,
String clusterName,
ReplicationBackupPolicy replicaPolicy,
boolean allowAutoFailback,
DistributedPrimitiveManagerConfiguration distributedManagerConfiguration) {
return new ReplicationPrimaryPolicy(ReplicationPrimaryPolicyConfiguration.withDefault()
.setCheckForLiveServer(false)
.setInitialReplicationSyncTimeout(initialReplicationSyncTimeout)
.setGroupName(groupName)
.setClusterName(clusterName)
.setDistributedManagerConfiguration(distributedManagerConfiguration),
replicaPolicy, allowAutoFailback);
}
/**
* It creates a primary policy that never allow auto fail-back.<br>
* It's meant to be used for natural-born primary brokers: its backup policy is set to always try to fail-back.
*/
public static ReplicationPrimaryPolicy with(ReplicationPrimaryPolicyConfiguration configuration) {
return new ReplicationPrimaryPolicy(configuration);
}
public ReplicationBackupPolicy getBackupPolicy() {
return backupPolicy;
}
@Override
public ReplicationPrimaryActivation createActivation(ActiveMQServerImpl server,
boolean wasLive,
Map<String, Object> activationParams,
IOCriticalErrorListener shutdownOnCriticalIO) throws Exception {
return new ReplicationPrimaryActivation(server,
DistributedPrimitiveManager.newInstanceOf(
distributedManagerConfiguration.getClassName(),
distributedManagerConfiguration.getProperties()), this);
}
@Override
public boolean isSharedStore() {
return false;
}
@Override
public boolean isBackup() {
return false;
}
@Override
public boolean isWaitForActivation() {
return true;
}
@Override
public boolean canScaleDown() {
return false;
}
@Override
public String getBackupGroupName() {
return groupName;
}
@Override
public String getScaleDownGroupName() {
return null;
}
@Override
public String getScaleDownClustername() {
return null;
}
public boolean isCheckForLiveServer() {
return checkForLiveServer;
}
public boolean isAllowAutoFailBack() {
return allowAutoFailBack;
}
public String getClusterName() {
return clusterName;
}
public long getInitialReplicationSyncTimeout() {
return initialReplicationSyncTimeout;
}
public String getGroupName() {
return groupName;
}
@Override
public boolean useQuorumManager() {
return false;
}
}

View File

@ -28,11 +28,12 @@ import org.apache.activemq.artemis.core.client.impl.Topology;
import org.apache.activemq.artemis.core.protocol.core.CoreRemotingConnection;
import org.apache.activemq.artemis.core.protocol.core.impl.wireformat.ReplicationLiveIsStoppingMessage;
import org.apache.activemq.artemis.core.server.ActiveMQServerLogger;
import org.apache.activemq.artemis.core.server.LiveNodeLocator.BackupRegistrationListener;
import org.apache.activemq.artemis.core.server.NetworkHealthCheck;
import org.apache.activemq.artemis.core.server.NodeManager;
import org.jboss.logging.Logger;
public class SharedNothingBackupQuorum implements Quorum, SessionFailureListener {
public class SharedNothingBackupQuorum implements Quorum, SessionFailureListener, BackupRegistrationListener {
private static final Logger LOGGER = Logger.getLogger(SharedNothingBackupQuorum.class);
@ -236,13 +237,9 @@ public class SharedNothingBackupQuorum implements Quorum, SessionFailureListener
}
}
public void notifyRegistrationFailed() {
signal = BACKUP_ACTIVATION.FAILURE_REPLICATING;
latch.countDown();
}
public void notifyAlreadyReplicating() {
signal = BACKUP_ACTIVATION.ALREADY_REPLICATING;
@Override
public void onBackupRegistrationFailed(boolean alreadyReplicating) {
signal = alreadyReplicating ? BACKUP_ACTIVATION.ALREADY_REPLICATING : BACKUP_ACTIVATION.FAILURE_REPLICATING;
latch.countDown();
}

View File

@ -36,6 +36,7 @@ public class FileMoveManager {
private static final Logger logger = Logger.getLogger(FileMoveManager.class);
private final File folder;
private final String[] prefixesToPreserve;
private int maxFolders;
public static final String PREFIX = "oldreplica.";
@ -70,9 +71,10 @@ public class FileMoveManager {
this(folder, -1);
}
public FileMoveManager(File folder, int maxFolders) {
public FileMoveManager(File folder, int maxFolders, String... prefixesToPreserve) {
this.folder = folder;
this.maxFolders = maxFolders;
this.prefixesToPreserve = prefixesToPreserve != null ? Arrays.copyOf(prefixesToPreserve, prefixesToPreserve.length) : null;
}
public int getMaxFolders() {
@ -99,8 +101,23 @@ public class FileMoveManager {
ActiveMQServerLogger.LOGGER.backupDeletingData(folder.getPath());
for (String fileMove : files) {
File fileFrom = new File(folder, fileMove);
logger.tracef("deleting %s", fileFrom);
deleteTree(fileFrom);
if (prefixesToPreserve != null) {
boolean skip = false;
for (String prefixToPreserve : prefixesToPreserve) {
if (fileMove.startsWith(prefixToPreserve)) {
logger.tracef("skipping %s", fileFrom);
skip = true;
break;
}
}
if (!skip) {
logger.tracef("deleting %s", fileFrom);
deleteTree(fileFrom);
}
} else {
logger.tracef("deleting %s", fileFrom);
deleteTree(fileFrom);
}
}
} else {
// Since we will create one folder, we are already taking that one into consideration
@ -113,8 +130,26 @@ public class FileMoveManager {
for (String fileMove : files) {
File fileFrom = new File(folder, fileMove);
File fileTo = new File(folderTo, fileMove);
logger.tracef("doMove:: moving %s as %s", fileFrom, fileTo);
Files.move(fileFrom.toPath(), fileTo.toPath());
if (prefixesToPreserve != null) {
boolean copy = false;
for (String prefixToPreserve : prefixesToPreserve) {
if (fileMove.startsWith(prefixToPreserve)) {
logger.tracef("skipping %s", fileFrom);
copy = true;
break;
}
}
if (copy) {
logger.tracef("copying %s to %s", fileFrom, fileTo);
Files.copy(fileFrom.toPath(), fileTo.toPath());
} else {
logger.tracef("doMove:: moving %s as %s", fileFrom, fileTo);
Files.move(fileFrom.toPath(), fileTo.toPath());
}
} else {
logger.tracef("doMove:: moving %s as %s", fileFrom, fileTo);
Files.move(fileFrom.toPath(), fileTo.toPath());
}
}
}

View File

@ -110,4 +110,8 @@ public abstract class Activation implements Runnable {
public ReplicationManager getReplicationManager() {
return null;
}
public boolean isReplicaSync() {
return false;
}
}

View File

@ -109,7 +109,6 @@ import org.apache.activemq.artemis.core.postoffice.impl.LocalQueueBinding;
import org.apache.activemq.artemis.core.postoffice.impl.PostOfficeImpl;
import org.apache.activemq.artemis.core.remoting.server.RemotingService;
import org.apache.activemq.artemis.core.remoting.server.impl.RemotingServiceImpl;
import org.apache.activemq.artemis.core.replication.ReplicationEndpoint;
import org.apache.activemq.artemis.core.replication.ReplicationManager;
import org.apache.activemq.artemis.core.security.CheckType;
import org.apache.activemq.artemis.core.security.Role;
@ -797,14 +796,6 @@ public class ActiveMQServerImpl implements ActiveMQServer {
}
}
@Override
public ReplicationEndpoint getReplicationEndpoint() {
if (activation instanceof SharedNothingBackupActivation) {
return ((SharedNothingBackupActivation) activation).getReplicationEndpoint();
}
return null;
}
@Override
public void unlockActivation() {
activationLock.release();
@ -921,7 +912,7 @@ public class ActiveMQServerImpl implements ActiveMQServer {
return threadPool;
}
public void setActivation(SharedNothingLiveActivation activation) {
public void setActivation(Activation activation) {
this.activation = activation;
}
@ -1145,19 +1136,7 @@ public class ActiveMQServerImpl implements ActiveMQServer {
@Override
public boolean isReplicaSync() {
if (activation instanceof SharedNothingLiveActivation) {
ReplicationManager replicationManager = getReplicationManager();
if (replicationManager == null) {
return false;
} else {
return !replicationManager.isSynchronizing();
}
} else if (activation instanceof SharedNothingBackupActivation) {
return ((SharedNothingBackupActivation) activation).isRemoteBackupUpToDate();
} else {
return false;
}
return activation.isReplicaSync();
}
public void stop(boolean failoverOnServerShutdown, final boolean criticalIOError, boolean restarting) {
@ -3116,7 +3095,7 @@ public class ActiveMQServerImpl implements ActiveMQServer {
postOffice = new PostOfficeImpl(this, storageManager, pagingManager, queueFactory, managementService, configuration.getMessageExpiryScanPeriod(), configuration.getAddressQueueScanPeriod(), configuration.getWildcardConfiguration(), configuration.getIDCacheSize(), configuration.isPersistIDCache(), addressSettingsRepository);
// This can't be created until node id is set
clusterManager = new ClusterManager(executorFactory, this, postOffice, scheduledPool, managementService, configuration, nodeManager, haPolicy.isBackup());
clusterManager = new ClusterManager(executorFactory, this, postOffice, scheduledPool, managementService, configuration, nodeManager, haPolicy.useQuorumManager());
federationManager = new FederationManager(this);
@ -4191,10 +4170,16 @@ public class ActiveMQServerImpl implements ActiveMQServer {
* move any older data away and log a warning about it.
*/
void moveServerData(int maxSavedReplicated) throws IOException {
moveServerData(maxSavedReplicated, false);
}
void moveServerData(int maxSavedReplicated, boolean preserveLockFiles) throws IOException {
File[] dataDirs = new File[]{configuration.getBindingsLocation(), configuration.getJournalLocation(), configuration.getPagingLocation(), configuration.getLargeMessagesLocation()};
for (File data : dataDirs) {
FileMoveManager moveManager = new FileMoveManager(data, maxSavedReplicated);
final boolean isLockFolder = preserveLockFiles ? data.equals(configuration.getNodeManagerLockLocation()) : false;
final String[] lockPrefixes = isLockFolder ? new String[]{FileBasedNodeManager.SERVER_LOCK_NAME, "serverlock"} : null;
FileMoveManager moveManager = new FileMoveManager(data, maxSavedReplicated, lockPrefixes);
moveManager.doMove();
}
}

View File

@ -29,7 +29,6 @@ import org.apache.activemq.artemis.api.core.Pair;
import org.apache.activemq.artemis.api.core.TransportConfiguration;
import org.apache.activemq.artemis.api.core.client.TopologyMember;
import org.apache.activemq.artemis.core.server.LiveNodeLocator;
import org.apache.activemq.artemis.core.server.cluster.qourum.SharedNothingBackupQuorum;
import org.apache.activemq.artemis.utils.ConcurrentUtil;
/**
@ -47,8 +46,9 @@ public class AnyLiveNodeLocatorForReplication extends LiveNodeLocator {
private String nodeID;
public AnyLiveNodeLocatorForReplication(SharedNothingBackupQuorum backupQuorum, ActiveMQServerImpl server, long retryReplicationWait) {
super(backupQuorum);
public AnyLiveNodeLocatorForReplication(BackupRegistrationListener backupRegistrationListener,
ActiveMQServerImpl server, long retryReplicationWait) {
super(backupRegistrationListener);
this.server = server;
this.retryReplicationWait = retryReplicationWait;
}

View File

@ -0,0 +1,160 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.core.server.impl;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import org.apache.activemq.artemis.api.core.ActiveMQException;
import org.apache.activemq.artemis.api.core.ActiveMQExceptionType;
import org.apache.activemq.artemis.api.core.DiscoveryGroupConfiguration;
import org.apache.activemq.artemis.api.core.TransportConfiguration;
import org.apache.activemq.artemis.api.core.client.ActiveMQClient;
import org.apache.activemq.artemis.api.core.client.ClientSession;
import org.apache.activemq.artemis.api.core.client.ClientSessionFactory;
import org.apache.activemq.artemis.api.core.client.ClusterTopologyListener;
import org.apache.activemq.artemis.api.core.client.ServerLocator;
import org.apache.activemq.artemis.api.core.client.TopologyMember;
import org.apache.activemq.artemis.core.client.impl.ClientSessionFactoryInternal;
import org.apache.activemq.artemis.core.client.impl.ServerLocatorInternal;
import org.apache.activemq.artemis.core.config.ClusterConnectionConfiguration;
import org.apache.activemq.artemis.core.config.Configuration;
import org.apache.activemq.artemis.core.config.ConfigurationUtils;
import org.apache.activemq.artemis.core.server.ActiveMQMessageBundle;
import org.apache.activemq.artemis.core.server.ActiveMQServerLogger;
import org.jboss.logging.Logger;
/**
* This class contains some utils to allow a broker to check presence and role of another broker in the cluster.
*/
final class ClusterTopologySearch {
private ClusterTopologySearch() {
}
/**
* Determines whether there is a live server already running with nodeID.<br>
* This search isn't filtering the caller broker transport and is meant to be used
* when the broker acceptors aren't running yet.
*/
public static boolean searchActiveLiveNodeId(String clusterName,
String nodeId,
long timeout,
TimeUnit unit,
Configuration serverConfiguration) throws ActiveMQException {
if (serverConfiguration.getClusterConfigurations().isEmpty())
return false;
final ClusterConnectionConfiguration clusterConnectionConfiguration = ConfigurationUtils.getReplicationClusterConfiguration(serverConfiguration, clusterName);
final LiveNodeIdListener liveNodeIdListener = new LiveNodeIdListener(nodeId, serverConfiguration.getClusterUser(), serverConfiguration.getClusterPassword());
try (ServerLocatorInternal locator = createLocator(serverConfiguration, clusterConnectionConfiguration)) {
// if would like to filter out a transport configuration:
// locator.setClusterTransportConfiguration(callerBrokerTransportConfiguration)
locator.addClusterTopologyListener(liveNodeIdListener);
locator.setReconnectAttempts(0);
try (ClientSessionFactoryInternal ignored = locator.connectNoWarnings()) {
return liveNodeIdListener.awaitNodePresent(timeout, unit);
} catch (Exception notConnected) {
if (!(notConnected instanceof ActiveMQException) || ActiveMQExceptionType.INTERNAL_ERROR.equals(((ActiveMQException) notConnected).getType())) {
// report all exceptions that aren't ActiveMQException and all INTERNAL_ERRORs
ActiveMQServerLogger.LOGGER.failedConnectingToCluster(notConnected);
}
return false;
}
}
}
private static final class LiveNodeIdListener implements ClusterTopologyListener {
private static final Logger logger = Logger.getLogger(LiveNodeIdListener.class);
private final String nodeId;
private final String user;
private final String password;
private final CountDownLatch searchCompleted;
private boolean isNodePresent = false;
LiveNodeIdListener(String nodeId, String user, String password) {
this.nodeId = nodeId;
this.user = user;
this.password = password;
this.searchCompleted = new CountDownLatch(1);
}
@Override
public void nodeUP(TopologyMember topologyMember, boolean last) {
boolean isOurNodeId = nodeId != null && nodeId.equals(topologyMember.getNodeId());
if (isOurNodeId && isActive(topologyMember.getLive())) {
isNodePresent = true;
}
if (isOurNodeId || last) {
searchCompleted.countDown();
}
}
public boolean awaitNodePresent(long timeout, TimeUnit unit) throws InterruptedException {
searchCompleted.await(timeout, unit);
return isNodePresent;
}
/**
* In a cluster of replicated live/backup pairs if a backup crashes and then its live crashes the cluster will
* retain the topology information of the live such that when the live server restarts it will check the
* cluster to see if its nodeID is present (which it will be) and then it will activate as a backup rather than
* a live. To prevent this situation an additional check is necessary to see if the server with the matching
* nodeID is actually active or not which is done by attempting to make a connection to it.
*
* @param transportConfiguration
* @return
*/
private boolean isActive(TransportConfiguration transportConfiguration) {
try (ServerLocator serverLocator = ActiveMQClient.createServerLocator(false, transportConfiguration);
ClientSessionFactory clientSessionFactory = serverLocator.createSessionFactory();
ClientSession clientSession = clientSessionFactory.createSession(user, password, false, false, false, false, 0)) {
return true;
} catch (Exception e) {
logger.debug("isActive check failed", e);
return false;
}
}
@Override
public void nodeDown(long eventUID, String nodeID) {
// no-op
}
}
private static ServerLocatorInternal createLocator(Configuration configuration,
ClusterConnectionConfiguration config) throws ActiveMQException {
final ServerLocatorInternal locator;
if (config.getDiscoveryGroupName() != null) {
DiscoveryGroupConfiguration dg = configuration.getDiscoveryGroupConfigurations().get(config.getDiscoveryGroupName());
if (dg == null) {
throw ActiveMQMessageBundle.BUNDLE.noDiscoveryGroupFound(null);
}
locator = (ServerLocatorInternal) ActiveMQClient.createServerLocatorWithHA(dg);
} else {
TransportConfiguration[] tcConfigs = config.getStaticConnectors() != null ? configuration.getTransportConfigurations(config.getStaticConnectors()) : null;
locator = (ServerLocatorInternal) ActiveMQClient.createServerLocatorWithHA(tcConfigs);
}
return locator;
}
}

View File

@ -20,6 +20,7 @@ import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.channels.FileChannel;
import org.apache.activemq.artemis.core.server.ActiveMQServerLogger;
@ -27,19 +28,66 @@ import org.apache.activemq.artemis.core.server.NodeManager;
import org.apache.activemq.artemis.utils.UUID;
import org.apache.activemq.artemis.utils.UUIDGenerator;
import static java.nio.file.StandardOpenOption.CREATE_NEW;
import static java.nio.file.StandardOpenOption.READ;
import static java.nio.file.StandardOpenOption.WRITE;
public abstract class FileBasedNodeManager extends NodeManager {
protected static final byte FIRST_TIME_START = '0';
public static final String SERVER_LOCK_NAME = "server.lock";
public static final String DATA_VERSION_NAME = "server.data.version";
private static final String ACCESS_MODE = "rw";
private final File directory;
protected FileChannel channel;
protected FileChannel dataVersionChannel;
public FileBasedNodeManager(boolean replicatedBackup, File directory) {
super(replicatedBackup);
this.directory = directory;
}
protected void useDataVersionChannel() throws IOException {
if (dataVersionChannel != null) {
return;
}
dataVersionChannel = FileChannel.open(newFile(DATA_VERSION_NAME).toPath(), READ, WRITE, CREATE_NEW);
}
@Override
public long readDataVersion() throws NodeManagerException {
if (!isStarted()) {
throw new NodeManagerException(new IllegalStateException("node manager must be started first"));
}
try {
useDataVersionChannel();
ByteBuffer tmpBuffer = ByteBuffer.allocate(Long.BYTES).order(ByteOrder.BIG_ENDIAN);
if (dataVersionChannel.read(tmpBuffer, 0) != Long.BYTES) {
return 0;
}
tmpBuffer.flip();
return tmpBuffer.getLong(0);
} catch (IOException ie) {
throw new NodeManagerException(ie);
}
}
@Override
public void writeDataVersion(long version) throws NodeManagerException {
if (!isStarted()) {
throw new NodeManagerException(new IllegalStateException("node manager must be started first"));
}
try {
useDataVersionChannel();
ByteBuffer tmpBuffer = ByteBuffer.allocate(Long.BYTES).order(ByteOrder.BIG_ENDIAN);
tmpBuffer.putLong(0, version);
dataVersionChannel.write(tmpBuffer, 0);
dataVersionChannel.force(false);
} catch (IOException ie) {
throw new NodeManagerException(ie);
}
}
/**
* Ensures existence of persistent information about the server's nodeID.
* <p>
@ -137,9 +185,20 @@ public abstract class FileBasedNodeManager extends NodeManager {
@Override
public synchronized void stop() throws Exception {
FileChannel channelCopy = channel;
if (channelCopy != null)
channelCopy.close();
super.stop();
try {
if (channelCopy != null)
channelCopy.close();
} finally {
try {
FileChannel dataVersionChannel = this.dataVersionChannel;
this.dataVersionChannel = null;
if (dataVersionChannel != null) {
dataVersionChannel.close();
}
} finally {
super.stop();
}
}
}
@Override

View File

@ -0,0 +1,127 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.core.server.impl;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.Queue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import org.apache.activemq.artemis.api.core.ActiveMQException;
import org.apache.activemq.artemis.api.core.Pair;
import org.apache.activemq.artemis.api.core.TransportConfiguration;
import org.apache.activemq.artemis.api.core.client.TopologyMember;
import org.apache.activemq.artemis.core.server.LiveNodeLocator;
import org.apache.activemq.artemis.utils.ConcurrentUtil;
/**
* It looks for a live server in the cluster with a specific NodeID
*/
public class NamedLiveNodeIdLocatorForReplication extends LiveNodeLocator {
private final Lock lock = new ReentrantLock();
private final Condition condition = lock.newCondition();
private final String nodeID;
private final long retryReplicationWait;
private final Queue<Pair<TransportConfiguration, TransportConfiguration>> liveConfigurations = new LinkedList<>();
private final ArrayList<Pair<TransportConfiguration, TransportConfiguration>> triedConfigurations = new ArrayList<>();
private boolean found;
public NamedLiveNodeIdLocatorForReplication(String nodeID,
BackupRegistrationListener backupRegistrationListener,
long retryReplicationWait) {
super(backupRegistrationListener);
this.nodeID = nodeID;
this.retryReplicationWait = retryReplicationWait;
}
@Override
public void locateNode() throws ActiveMQException {
locateNode(-1L);
}
@Override
public void locateNode(long timeout) throws ActiveMQException {
try {
lock.lock();
if (liveConfigurations.size() == 0) {
try {
if (timeout != -1L) {
ConcurrentUtil.await(condition, timeout);
} else {
while (liveConfigurations.size() == 0) {
condition.await(retryReplicationWait, TimeUnit.MILLISECONDS);
liveConfigurations.addAll(triedConfigurations);
triedConfigurations.clear();
}
}
} catch (InterruptedException e) {
//ignore
}
}
} finally {
lock.unlock();
}
}
@Override
public void nodeUP(TopologyMember topologyMember, boolean last) {
try {
lock.lock();
if (nodeID.equals(topologyMember.getNodeId()) && topologyMember.getLive() != null) {
Pair<TransportConfiguration, TransportConfiguration> liveConfiguration = new Pair<>(topologyMember.getLive(), topologyMember.getBackup());
if (!liveConfigurations.contains(liveConfiguration)) {
liveConfigurations.add(liveConfiguration);
}
found = true;
condition.signal();
}
} finally {
lock.unlock();
}
}
@Override
public void nodeDown(long eventUID, String nodeID) {
//no op
}
@Override
public String getNodeID() {
return found ? nodeID : null;
}
@Override
public Pair<TransportConfiguration, TransportConfiguration> getLiveConfiguration() {
return liveConfigurations.peek();
}
@Override
public void notifyRegistrationFailed(boolean alreadyReplicating) {
try {
lock.lock();
triedConfigurations.add(liveConfigurations.poll());
super.notifyRegistrationFailed(alreadyReplicating);
} finally {
lock.unlock();
}
}
}

View File

@ -29,7 +29,6 @@ import org.apache.activemq.artemis.api.core.Pair;
import org.apache.activemq.artemis.api.core.TransportConfiguration;
import org.apache.activemq.artemis.api.core.client.TopologyMember;
import org.apache.activemq.artemis.core.server.LiveNodeLocator;
import org.apache.activemq.artemis.core.server.cluster.qourum.SharedNothingBackupQuorum;
import org.apache.activemq.artemis.utils.ConcurrentUtil;
/**
@ -48,8 +47,10 @@ public class NamedLiveNodeLocatorForReplication extends LiveNodeLocator {
private String nodeID;
public NamedLiveNodeLocatorForReplication(String backupGroupName, SharedNothingBackupQuorum quorumManager, long retryReplicationWait) {
super(quorumManager);
public NamedLiveNodeLocatorForReplication(String backupGroupName,
BackupRegistrationListener backupRegistrationListener,
long retryReplicationWait) {
super(backupRegistrationListener);
this.backupGroupName = backupGroupName;
this.retryReplicationWait = retryReplicationWait;
}

View File

@ -0,0 +1,599 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.core.server.impl;
import javax.annotation.concurrent.GuardedBy;
import java.util.Objects;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.function.Consumer;
import org.apache.activemq.artemis.api.core.ActiveMQException;
import org.apache.activemq.artemis.api.core.ActiveMQIllegalStateException;
import org.apache.activemq.artemis.api.core.Pair;
import org.apache.activemq.artemis.api.core.SimpleString;
import org.apache.activemq.artemis.api.core.TransportConfiguration;
import org.apache.activemq.artemis.core.protocol.core.Channel;
import org.apache.activemq.artemis.core.replication.ReplicationEndpoint;
import org.apache.activemq.artemis.core.server.ActiveMQServer;
import org.apache.activemq.artemis.core.server.ActiveMQServerLogger;
import org.apache.activemq.artemis.core.server.LiveNodeLocator;
import org.apache.activemq.artemis.core.server.NodeManager;;
import org.apache.activemq.artemis.core.server.cluster.ClusterControl;
import org.apache.activemq.artemis.core.server.cluster.ClusterController;
import org.apache.activemq.artemis.core.server.cluster.ha.ReplicationBackupPolicy;
import org.apache.activemq.artemis.quorum.DistributedLock;
import org.apache.activemq.artemis.quorum.DistributedPrimitiveManager;
import org.apache.activemq.artemis.quorum.UnavailableStateException;
import org.jboss.logging.Logger;
import static org.apache.activemq.artemis.core.server.impl.ReplicationObserver.ReplicationFailure;
/**
* This activation can be used by a primary while trying to fail-back ie {@code failback == true} or
* by a natural-born backup ie {@code failback == false}.<br>
*/
public final class ReplicationBackupActivation extends Activation implements DistributedPrimitiveManager.UnavailableManagerListener {
private static final Logger LOGGER = Logger.getLogger(ReplicationBackupActivation.class);
private final boolean wasLive;
private final ReplicationBackupPolicy policy;
private final ActiveMQServerImpl activeMQServer;
// This field is != null iff this node is a primary during a fail-back ie acting as a backup in order to become live again.
private final String expectedNodeID;
@GuardedBy("this")
private boolean closed;
private final DistributedPrimitiveManager distributedManager;
// Used for monitoring purposes
private volatile ReplicationObserver replicationObserver;
// Used for testing purposes
private volatile ReplicationEndpoint replicationEndpoint;
// Used for testing purposes
private Consumer<ReplicationEndpoint> onReplicationEndpointCreation;
// Used to arbiter one-shot server stop/restart
private final AtomicBoolean stopping;
public ReplicationBackupActivation(final ActiveMQServerImpl activeMQServer,
final boolean wasLive,
final DistributedPrimitiveManager distributedManager,
final ReplicationBackupPolicy policy) {
this.wasLive = wasLive;
this.activeMQServer = activeMQServer;
if (policy.isTryFailback()) {
final SimpleString serverNodeID = activeMQServer.getNodeID();
if (serverNodeID == null || serverNodeID.isEmpty()) {
throw new IllegalStateException("A failback activation must be biased around a specific NodeID");
}
this.expectedNodeID = serverNodeID.toString();
} else {
this.expectedNodeID = null;
}
this.distributedManager = distributedManager;
this.policy = policy;
this.replicationObserver = null;
this.replicationEndpoint = null;
this.stopping = new AtomicBoolean(false);
}
/**
* used for testing purposes.
*/
public DistributedPrimitiveManager getDistributedManager() {
return distributedManager;
}
@Override
public void onUnavailableManagerEvent() {
synchronized (this) {
if (closed) {
return;
}
}
LOGGER.info("Unavailable quorum service detected: try restart server");
asyncRestartServer(activeMQServer, true);
}
/**
* This util class exists because {@link LiveNodeLocator} need a {@link LiveNodeLocator.BackupRegistrationListener}
* to forward backup registration failure events: this is used to switch on/off backup registration event listening
* on an existing locator.
*/
private static final class RegistrationFailureForwarder implements LiveNodeLocator.BackupRegistrationListener, AutoCloseable {
private static final LiveNodeLocator.BackupRegistrationListener NOOP_LISTENER = ignore -> {
};
private volatile LiveNodeLocator.BackupRegistrationListener listener = NOOP_LISTENER;
public RegistrationFailureForwarder to(LiveNodeLocator.BackupRegistrationListener listener) {
this.listener = listener;
return this;
}
@Override
public void onBackupRegistrationFailed(boolean alreadyReplicating) {
listener.onBackupRegistrationFailed(alreadyReplicating);
}
@Override
public void close() {
listener = NOOP_LISTENER;
}
}
@Override
public void run() {
synchronized (this) {
if (closed) {
return;
}
}
try {
LOGGER.info("Trying to reach majority of quorum service nodes");
distributedManager.start();
LOGGER.info("Quorum service available: starting broker");
distributedManager.addUnavailableManagerListener(this);
// Stop the previous node manager and create a new one with NodeManager::replicatedBackup == true:
// NodeManager::start skip setup lock file with NodeID, until NodeManager::stopBackup is called.
activeMQServer.resetNodeManager();
activeMQServer.getNodeManager().stop();
// A primary need to preserve NodeID across runs
activeMQServer.moveServerData(policy.getMaxSavedReplicatedJournalsSize(), policy.isTryFailback());
activeMQServer.getNodeManager().start();
if (!activeMQServer.initialisePart1(false)) {
return;
}
synchronized (this) {
if (closed)
return;
}
final ClusterController clusterController = activeMQServer.getClusterManager().getClusterController();
clusterController.awaitConnectionToReplicationCluster();
activeMQServer.getBackupManager().start();
ActiveMQServerLogger.LOGGER.backupServerStarted(activeMQServer.getVersion().getFullVersion(),
activeMQServer.getNodeManager().getNodeId());
activeMQServer.setState(ActiveMQServerImpl.SERVER_STATE.STARTED);
final DistributedLock liveLock = replicateAndFailover(clusterController);
if (liveLock == null) {
return;
}
startAsLive(liveLock);
} catch (Exception e) {
if ((e instanceof InterruptedException || e instanceof IllegalStateException) && !activeMQServer.isStarted()) {
// do not log these errors if the server is being stopped.
return;
}
ActiveMQServerLogger.LOGGER.initializationError(e);
}
}
private void startAsLive(final DistributedLock liveLock) throws Exception {
activeMQServer.setHAPolicy(policy.getLivePolicy());
synchronized (activeMQServer) {
if (!activeMQServer.isStarted()) {
liveLock.close();
return;
}
ActiveMQServerLogger.LOGGER.becomingLive(activeMQServer);
// stopBackup is going to write the NodeID previously set on the NodeManager,
// because activeMQServer.resetNodeManager() has created a NodeManager with replicatedBackup == true.
activeMQServer.getNodeManager().stopBackup();
activeMQServer.getStorageManager().start();
activeMQServer.getBackupManager().activated();
// IMPORTANT:
// we're setting this activation JUST because it would allow the server to use its
// getActivationChannelHandler to handle replication
final ReplicationPrimaryActivation primaryActivation = new ReplicationPrimaryActivation(activeMQServer, distributedManager, policy.getLivePolicy());
liveLock.addListener(primaryActivation);
activeMQServer.setActivation(primaryActivation);
activeMQServer.initialisePart2(false);
// calling primaryActivation.stateChanged !isHelByCaller is necessary in case the lock was unavailable
// before liveLock.addListener: just throwing an exception won't stop the broker.
final boolean stillLive;
try {
stillLive = liveLock.isHeldByCaller();
} catch (UnavailableStateException e) {
LOGGER.warn(e);
primaryActivation.onUnavailableLockEvent();
throw new ActiveMQIllegalStateException("This server cannot check its role as a live: activation is failed");
}
if (!stillLive) {
primaryActivation.onUnavailableLockEvent();
throw new ActiveMQIllegalStateException("This server is not live anymore: activation is failed");
}
if (activeMQServer.getIdentity() != null) {
ActiveMQServerLogger.LOGGER.serverIsLive(activeMQServer.getIdentity());
} else {
ActiveMQServerLogger.LOGGER.serverIsLive();
}
activeMQServer.completeActivation(true);
}
}
private LiveNodeLocator createLiveNodeLocator(final LiveNodeLocator.BackupRegistrationListener registrationListener) {
if (expectedNodeID != null) {
assert policy.isTryFailback();
return new NamedLiveNodeIdLocatorForReplication(expectedNodeID, registrationListener, policy.getRetryReplicationWait());
}
return policy.getGroupName() == null ?
new AnyLiveNodeLocatorForReplication(registrationListener, activeMQServer, policy.getRetryReplicationWait()) :
new NamedLiveNodeLocatorForReplication(policy.getGroupName(), registrationListener, policy.getRetryReplicationWait());
}
private DistributedLock replicateAndFailover(final ClusterController clusterController) throws ActiveMQException, InterruptedException {
final RegistrationFailureForwarder registrationFailureForwarder = new RegistrationFailureForwarder();
// node locator isn't stateless and contains a live-list of candidate nodes to connect too, hence
// it MUST be reused for each replicateLive attempt
final LiveNodeLocator nodeLocator = createLiveNodeLocator(registrationFailureForwarder);
clusterController.addClusterTopologyListenerForReplication(nodeLocator);
try {
while (true) {
synchronized (this) {
if (closed) {
return null;
}
}
final ReplicationFailure failure = replicateLive(clusterController, nodeLocator, registrationFailureForwarder);
if (failure == null) {
Thread.sleep(clusterController.getRetryIntervalForReplicatedCluster());
continue;
}
if (!activeMQServer.isStarted()) {
return null;
}
LOGGER.debugf("ReplicationFailure = %s", failure);
boolean voluntaryFailOver = false;
switch (failure) {
case VoluntaryFailOver:
voluntaryFailOver = true;
case NonVoluntaryFailover:
final DistributedLock liveLock = tryAcquireLiveLock();
// from now on we're meant to stop:
// - due to failover
// - due to restart/stop
assert stopping.get();
if (liveLock != null) {
return liveLock;
}
boolean restart = true;
if (voluntaryFailOver && isFirstFailbackAttempt()) {
restart = false;
LOGGER.error("Failed to fail-back: stopping broker based on quorum results");
} else {
ActiveMQServerLogger.LOGGER.restartingAsBackupBasedOnQuorumVoteResults();
}
// let's ignore the stopping flag here, we're in control of it
asyncRestartServer(activeMQServer, restart, false);
return null;
case RegistrationError:
LOGGER.error("Stopping broker because of critical registration error");
asyncRestartServer(activeMQServer, false);
return null;
case AlreadyReplicating:
// can just retry here, data should be clean and nodeLocator
// should remove the live node that has answered this
LOGGER.info("Live broker was already replicating: retry sync with another live");
continue;
case ClosedObserver:
return null;
case BackupNotInSync:
LOGGER.info("Replication failure while initial sync not yet completed: restart as backup");
asyncRestartServer(activeMQServer, true);
return null;
case WrongNodeId:
LOGGER.error("Stopping broker because of wrong node ID communication from live: maybe a misbehaving live?");
asyncRestartServer(activeMQServer, false);
return null;
default:
throw new AssertionError("Unsupported failure " + failure);
}
}
} finally {
silentExecution("Errored on cluster topology listener for replication cleanup", () -> clusterController.removeClusterTopologyListenerForReplication(nodeLocator));
}
}
/**
* {@code wasLive} is {code true} only while transitioning from primary to backup.<br>
* If a natural born backup become live and allows failback, while transitioning to back again
* {@code wasLive} is still {@code false}.<br>
* The check on {@link ReplicationBackupPolicy#isTryFailback()} is redundant but still useful for correctness.
* <p>
* In case of fail-back, any event that's going to restart this broker as backup (eg quorum service unavailable
* or some replication failures) will cause {@code wasLive} to be {@code false}, because the HA policy set isn't
* a primary anymore.
*/
private boolean isFirstFailbackAttempt() {
return wasLive && policy.isTryFailback();
}
private DistributedLock tryAcquireLiveLock() throws InterruptedException {
// disable quorum service unavailability handling and just treat this imperatively
if (!stopping.compareAndSet(false, true)) {
// already unavailable quorum service: fail fast
return null;
}
distributedManager.removeUnavailableManagerListener(this);
assert activeMQServer.getNodeManager().getNodeId() != null;
final String liveID = activeMQServer.getNodeManager().getNodeId().toString();
final int voteRetries = policy.getVoteRetries();
final long maxAttempts = voteRetries >= 0 ? (voteRetries + 1) : -1;
if (maxAttempts == -1) {
LOGGER.error("It's not safe to retry an infinite amount of time to acquire a live lock: please consider setting a vote-retries value");
}
final long voteRetryWait = policy.getVoteRetryWait();
final DistributedLock liveLock = getLock(distributedManager, liveID);
if (liveLock == null) {
return null;
}
for (long attempt = 0; maxAttempts >= 0 ? (attempt < maxAttempts) : true; attempt++) {
try {
if (liveLock.tryLock(voteRetryWait, TimeUnit.MILLISECONDS)) {
LOGGER.debugf("%s live lock acquired after %d attempts.", liveID, (attempt + 1));
return liveLock;
}
} catch (UnavailableStateException e) {
LOGGER.warnf(e, "Failed to acquire live lock %s because of unavailable quorum service: stop trying", liveID);
distributedManager.stop();
return null;
}
}
LOGGER.warnf("Failed to acquire live lock %s after %d tries", liveID, maxAttempts);
distributedManager.stop();
return null;
}
private DistributedLock getLock(final DistributedPrimitiveManager manager,
final String lockId) throws InterruptedException {
if (!manager.isStarted()) {
return null;
}
try {
return manager.getDistributedLock(lockId);
} catch (ExecutionException e) {
LOGGER.warnf(e, "Errored while getting lock %s", lockId);
return null;
} catch (TimeoutException te) {
LOGGER.warnf(te, "Timeout while getting lock %s", lockId);
return null;
}
}
private ReplicationObserver replicationObserver() {
if (policy.isTryFailback()) {
return ReplicationObserver.failbackObserver(activeMQServer.getNodeManager(), activeMQServer.getBackupManager(), activeMQServer.getScheduledPool(), expectedNodeID);
}
return ReplicationObserver.failoverObserver(activeMQServer.getNodeManager(), activeMQServer.getBackupManager(), activeMQServer.getScheduledPool());
}
private ReplicationFailure replicateLive(final ClusterController clusterController,
final LiveNodeLocator liveLocator,
final RegistrationFailureForwarder registrationFailureForwarder) throws ActiveMQException {
try (ReplicationObserver replicationObserver = replicationObserver();
RegistrationFailureForwarder ignored = registrationFailureForwarder.to(replicationObserver)) {
this.replicationObserver = replicationObserver;
clusterController.addClusterTopologyListener(replicationObserver);
// ReplicationError notifies backup registration failures to live locator -> forwarder -> observer
final ReplicationError replicationError = new ReplicationError(liveLocator);
clusterController.addIncomingInterceptorForReplication(replicationError);
try {
final ClusterControl liveControl = tryLocateAndConnectToLive(liveLocator, clusterController);
if (liveControl == null) {
return null;
}
try {
final ReplicationEndpoint replicationEndpoint = tryAuthorizeAndAsyncRegisterAsBackupToLive(liveControl, replicationObserver);
if (replicationEndpoint == null) {
return ReplicationFailure.RegistrationError;
}
this.replicationEndpoint = replicationEndpoint;
assert replicationEndpoint != null;
try {
return replicationObserver.awaitReplicationFailure();
} finally {
this.replicationEndpoint = null;
ActiveMQServerImpl.stopComponent(replicationEndpoint);
closeChannelOf(replicationEndpoint);
}
} finally {
silentExecution("Errored on live control close", liveControl::close);
}
} finally {
silentExecution("Errored on cluster topology listener cleanup", () -> clusterController.removeClusterTopologyListener(replicationObserver));
silentExecution("Errored while removing incoming interceptor for replication", () -> clusterController.removeIncomingInterceptorForReplication(replicationError));
}
} finally {
this.replicationObserver = null;
}
}
private static void silentExecution(String debugErrorMessage, Runnable task) {
try {
task.run();
} catch (Throwable ignore) {
LOGGER.debug(debugErrorMessage, ignore);
}
}
private static void closeChannelOf(final ReplicationEndpoint replicationEndpoint) {
if (replicationEndpoint == null) {
return;
}
if (replicationEndpoint.getChannel() != null) {
silentExecution("Errored while closing replication endpoint channel", () -> replicationEndpoint.getChannel().close());
replicationEndpoint.setChannel(null);
}
}
private boolean asyncRestartServer(final ActiveMQServer server, boolean restart) {
return asyncRestartServer(server, restart, true);
}
private boolean asyncRestartServer(final ActiveMQServer server, boolean restart, boolean checkStopping) {
if (checkStopping) {
if (!stopping.compareAndSet(false, true)) {
return false;
}
}
new Thread(() -> {
if (server.getState() != ActiveMQServer.SERVER_STATE.STOPPED && server.getState() != ActiveMQServer.SERVER_STATE.STOPPING) {
try {
server.stop(!restart);
if (restart) {
server.start();
}
} catch (Exception e) {
if (restart) {
ActiveMQServerLogger.LOGGER.errorRestartingBackupServer(e, server);
} else {
ActiveMQServerLogger.LOGGER.errorStoppingServer(e);
}
}
}
}).start();
return true;
}
private ClusterControl tryLocateAndConnectToLive(final LiveNodeLocator liveLocator,
final ClusterController clusterController) throws ActiveMQException {
liveLocator.locateNode();
final Pair<TransportConfiguration, TransportConfiguration> possibleLive = liveLocator.getLiveConfiguration();
final String nodeID = liveLocator.getNodeID();
if (nodeID == null) {
throw new RuntimeException("Could not establish the connection with any live");
}
if (!policy.isTryFailback()) {
assert expectedNodeID == null;
activeMQServer.getNodeManager().setNodeID(nodeID);
} else {
assert expectedNodeID.equals(nodeID);
}
if (possibleLive == null) {
return null;
}
final ClusterControl liveControl = tryConnectToNodeInReplicatedCluster(clusterController, possibleLive.getA());
if (liveControl != null) {
return liveControl;
}
return tryConnectToNodeInReplicatedCluster(clusterController, possibleLive.getB());
}
private static ClusterControl tryConnectToNodeInReplicatedCluster(final ClusterController clusterController,
final TransportConfiguration tc) {
try {
if (tc != null) {
return clusterController.connectToNodeInReplicatedCluster(tc);
}
} catch (Exception e) {
LOGGER.debug(e.getMessage(), e);
}
return null;
}
@Override
public void close(final boolean permanently, final boolean restarting) throws Exception {
synchronized (this) {
closed = true;
final ReplicationObserver replicationObserver = this.replicationObserver;
if (replicationObserver != null) {
replicationObserver.close();
}
}
//we have to check as the server policy may have changed
try {
if (activeMQServer.getHAPolicy().isBackup()) {
// To avoid a NPE cause by the stop
final NodeManager nodeManager = activeMQServer.getNodeManager();
activeMQServer.interruptActivationThread(nodeManager);
if (nodeManager != null) {
nodeManager.stopBackup();
}
}
} finally {
// this one need to happen after interrupting the activation thread
// in order to unblock distributedManager::start
distributedManager.stop();
}
}
@Override
public void preStorageClose() throws Exception {
// TODO replication endpoint close?
}
private ReplicationEndpoint tryAuthorizeAndAsyncRegisterAsBackupToLive(final ClusterControl liveControl,
final ReplicationObserver liveObserver) {
ReplicationEndpoint replicationEndpoint = null;
try {
liveControl.getSessionFactory().setReconnectAttempts(1);
liveObserver.listenConnectionFailuresOf(liveControl.getSessionFactory());
liveControl.authorize();
replicationEndpoint = new ReplicationEndpoint(activeMQServer, policy.isTryFailback(), liveObserver);
final Consumer<ReplicationEndpoint> onReplicationEndpointCreation = this.onReplicationEndpointCreation;
if (onReplicationEndpointCreation != null) {
onReplicationEndpointCreation.accept(replicationEndpoint);
}
replicationEndpoint.setExecutor(activeMQServer.getExecutorFactory().getExecutor());
connectToReplicationEndpoint(liveControl, replicationEndpoint);
replicationEndpoint.start();
liveControl.announceReplicatingBackupToLive(policy.isTryFailback(), policy.getClusterName());
return replicationEndpoint;
} catch (Exception e) {
ActiveMQServerLogger.LOGGER.replicationStartProblem(e);
ActiveMQServerImpl.stopComponent(replicationEndpoint);
closeChannelOf(replicationEndpoint);
return null;
}
}
private static boolean connectToReplicationEndpoint(final ClusterControl liveControl,
final ReplicationEndpoint replicationEndpoint) {
final Channel replicationChannel = liveControl.createReplicationChannel();
replicationChannel.setHandler(replicationEndpoint);
replicationEndpoint.setChannel(replicationChannel);
return true;
}
@Override
public boolean isReplicaSync() {
// NOTE: this method is just for monitoring purposes, not suitable to perform logic!
// During a failover this backup won't have any active liveObserver and will report `false`!!
final ReplicationObserver liveObserver = this.replicationObserver;
if (liveObserver == null) {
return false;
}
return liveObserver.isBackupUpToDate();
}
public ReplicationEndpoint getReplicationEndpoint() {
return replicationEndpoint;
}
/**
* This must be used just for testing purposes.
*/
public void spyReplicationEndpointCreation(Consumer<ReplicationEndpoint> onReplicationEndpointCreation) {
Objects.requireNonNull(onReplicationEndpointCreation);
this.onReplicationEndpointCreation = onReplicationEndpointCreation;
}
}

View File

@ -0,0 +1,332 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.core.server.impl;
import javax.annotation.concurrent.GuardedBy;
import java.util.Objects;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.TimeUnit;
import org.apache.activemq.artemis.api.core.ActiveMQException;
import org.apache.activemq.artemis.api.core.client.ClusterTopologyListener;
import org.apache.activemq.artemis.api.core.client.SessionFailureListener;
import org.apache.activemq.artemis.api.core.client.TopologyMember;
import org.apache.activemq.artemis.core.client.impl.ClientSessionFactoryInternal;
import org.apache.activemq.artemis.core.protocol.core.CoreRemotingConnection;
import org.apache.activemq.artemis.core.protocol.core.impl.wireformat.ReplicationLiveIsStoppingMessage;
import org.apache.activemq.artemis.core.replication.ReplicationEndpoint;
import org.apache.activemq.artemis.core.server.LiveNodeLocator.BackupRegistrationListener;
import org.apache.activemq.artemis.core.server.NodeManager;
import org.apache.activemq.artemis.core.server.cluster.BackupManager;
import org.jboss.logging.Logger;
final class ReplicationObserver implements ClusterTopologyListener, SessionFailureListener, BackupRegistrationListener, ReplicationEndpoint.ReplicationEndpointEventListener, AutoCloseable {
private static final Logger LOGGER = Logger.getLogger(ReplicationObserver.class);
public enum ReplicationFailure {
VoluntaryFailOver, BackupNotInSync, NonVoluntaryFailover, RegistrationError, AlreadyReplicating, ClosedObserver, WrongNodeId;
}
private final NodeManager nodeManager;
private final BackupManager backupManager;
private final ScheduledExecutorService scheduledPool;
private final boolean failback;
private final String expectedNodeID;
private final CompletableFuture<ReplicationFailure> replicationFailure;
@GuardedBy("this")
private ClientSessionFactoryInternal sessionFactory;
@GuardedBy("this")
private CoreRemotingConnection connection;
@GuardedBy("this")
private ScheduledFuture<?> forcedFailover;
private volatile String liveID;
private volatile boolean backupUpToDate;
private volatile boolean closed;
/**
* This is a safety net in case the live sends the first {@link ReplicationLiveIsStoppingMessage}
* with code {@link org.apache.activemq.artemis.core.protocol.core.impl.wireformat.ReplicationLiveIsStoppingMessage.LiveStopping#STOP_CALLED} and crashes before sending the second with
* {@link org.apache.activemq.artemis.core.protocol.core.impl.wireformat.ReplicationLiveIsStoppingMessage.LiveStopping#FAIL_OVER}.
* <p>
* If the second message does come within this dead line, we fail over anyway.
*/
public static final int WAIT_TIME_AFTER_FIRST_LIVE_STOPPING_MSG = 60;
private ReplicationObserver(final NodeManager nodeManager,
final BackupManager backupManager,
final ScheduledExecutorService scheduledPool,
final boolean failback,
final String expectedNodeID) {
this.nodeManager = nodeManager;
this.backupManager = backupManager;
this.scheduledPool = scheduledPool;
this.failback = failback;
this.expectedNodeID = expectedNodeID;
this.replicationFailure = new CompletableFuture<>();
this.sessionFactory = null;
this.connection = null;
this.forcedFailover = null;
this.liveID = null;
this.backupUpToDate = false;
this.closed = false;
}
public static ReplicationObserver failbackObserver(final NodeManager nodeManager,
final BackupManager backupManager,
final ScheduledExecutorService scheduledPool,
final String expectedNodeID) {
Objects.requireNonNull(expectedNodeID);
return new ReplicationObserver(nodeManager, backupManager, scheduledPool, true, expectedNodeID);
}
public static ReplicationObserver failoverObserver(final NodeManager nodeManager,
final BackupManager backupManager,
final ScheduledExecutorService scheduledPool) {
return new ReplicationObserver(nodeManager, backupManager, scheduledPool, false, null);
}
private void onLiveDown(boolean voluntaryFailover) {
if (closed || replicationFailure.isDone()) {
return;
}
synchronized (this) {
if (closed || replicationFailure.isDone()) {
return;
}
stopForcedFailoverAfterDelay();
unlistenConnectionFailures();
if (!isRemoteBackupUpToDate()) {
replicationFailure.complete(ReplicationFailure.BackupNotInSync);
} else if (voluntaryFailover) {
replicationFailure.complete(ReplicationFailure.VoluntaryFailOver);
} else {
replicationFailure.complete(ReplicationFailure.NonVoluntaryFailover);
}
}
}
@Override
public void nodeDown(long eventUID, String nodeID) {
// ignore it during a failback:
// a failing slave close all connections but the one used for replication
// triggering a nodeDown before the restarted master receive a STOP_CALLED from it.
// This can make master to fire a useless quorum vote during a normal failback.
if (failback) {
return;
}
if (nodeID.equals(liveID)) {
onLiveDown(false);
}
}
@Override
public void nodeUP(TopologyMember member, boolean last) {
}
/**
* if the connection to our replicated live goes down then decide on an action
*/
@Override
public void connectionFailed(ActiveMQException exception, boolean failedOver) {
onLiveDown(false);
}
@Override
public void connectionFailed(final ActiveMQException me, boolean failedOver, String scaleDownTargetNodeID) {
connectionFailed(me, failedOver);
}
@Override
public void beforeReconnect(ActiveMQException exception) {
//noop
}
@Override
public void close() {
if (closed) {
return;
}
synchronized (this) {
if (closed) {
return;
}
unlistenConnectionFailures();
closed = true;
replicationFailure.complete(ReplicationFailure.ClosedObserver);
}
}
/**
* @param liveSessionFactory the session factory used to connect to the live server
*/
public synchronized void listenConnectionFailuresOf(final ClientSessionFactoryInternal liveSessionFactory) {
if (closed) {
throw new IllegalStateException("the observer is closed: cannot listen to any failures");
}
if (sessionFactory != null || connection != null) {
throw new IllegalStateException("this observer is already listening to other session factory failures");
}
this.sessionFactory = liveSessionFactory;
//belts and braces, there are circumstances where the connection listener doesn't get called but the session does.
this.sessionFactory.addFailureListener(this);
connection = (CoreRemotingConnection) liveSessionFactory.getConnection();
connection.addFailureListener(this);
}
public synchronized void unlistenConnectionFailures() {
if (connection != null) {
connection.removeFailureListener(this);
connection = null;
}
if (sessionFactory != null) {
sessionFactory.removeFailureListener(this);
sessionFactory = null;
}
}
@Override
public void onBackupRegistrationFailed(boolean alreadyReplicating) {
if (closed || replicationFailure.isDone()) {
return;
}
synchronized (this) {
if (closed || replicationFailure.isDone()) {
return;
}
stopForcedFailoverAfterDelay();
unlistenConnectionFailures();
replicationFailure.complete(alreadyReplicating ? ReplicationFailure.AlreadyReplicating : ReplicationFailure.RegistrationError);
}
}
public ReplicationFailure awaitReplicationFailure() {
try {
return replicationFailure.get();
} catch (Throwable e) {
return ReplicationFailure.ClosedObserver;
}
}
private synchronized void scheduleForcedFailoverAfterDelay() {
if (forcedFailover != null) {
return;
}
forcedFailover = scheduledPool.schedule(() -> onLiveDown(false), WAIT_TIME_AFTER_FIRST_LIVE_STOPPING_MSG, TimeUnit.SECONDS);
}
private synchronized void stopForcedFailoverAfterDelay() {
if (forcedFailover == null) {
return;
}
forcedFailover.cancel(false);
forcedFailover = null;
}
@Override
public void onRemoteBackupUpToDate() {
if (backupUpToDate || closed || replicationFailure.isDone()) {
return;
}
synchronized (this) {
if (backupUpToDate || closed || replicationFailure.isDone()) {
return;
}
assert liveID != null;
backupManager.announceBackup();
backupUpToDate = true;
}
}
public boolean isBackupUpToDate() {
return backupUpToDate;
}
public String getLiveID() {
return liveID;
}
private boolean validateNodeId(String nodeID) {
if (nodeID == null) {
return false;
}
final String existingNodeId = this.liveID;
if (existingNodeId == null) {
if (!failback) {
return true;
}
return nodeID.equals(expectedNodeID);
}
return existingNodeId.equals(nodeID);
}
@Override
public void onLiveNodeId(String nodeId) {
if (closed || replicationFailure.isDone()) {
return;
}
final String existingNodeId = this.liveID;
if (existingNodeId != null && existingNodeId.equals(nodeId)) {
return;
}
synchronized (this) {
if (closed || replicationFailure.isDone()) {
return;
}
if (!validateNodeId(nodeId)) {
stopForcedFailoverAfterDelay();
unlistenConnectionFailures();
replicationFailure.complete(ReplicationFailure.WrongNodeId);
} else if (liveID == null) {
liveID = nodeId;
nodeManager.setNodeID(nodeId);
}
}
}
public boolean isRemoteBackupUpToDate() {
return backupUpToDate;
}
@Override
public void onLiveStopping(ReplicationLiveIsStoppingMessage.LiveStopping finalMessage) {
if (closed || replicationFailure.isDone()) {
return;
}
synchronized (this) {
if (closed || replicationFailure.isDone()) {
return;
}
switch (finalMessage) {
case STOP_CALLED:
scheduleForcedFailoverAfterDelay();
break;
case FAIL_OVER:
onLiveDown(true);
break;
default:
LOGGER.errorf("unsupported LiveStopping type: %s", finalMessage);
}
}
}
}

View File

@ -0,0 +1,439 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.core.server.impl;
import javax.annotation.concurrent.GuardedBy;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
import org.apache.activemq.artemis.api.core.ActiveMQAlreadyReplicatingException;
import org.apache.activemq.artemis.api.core.ActiveMQException;
import org.apache.activemq.artemis.api.core.ActiveMQIllegalStateException;
import org.apache.activemq.artemis.api.core.Pair;
import org.apache.activemq.artemis.api.core.TransportConfiguration;
import org.apache.activemq.artemis.core.protocol.core.Channel;
import org.apache.activemq.artemis.core.protocol.core.ChannelHandler;
import org.apache.activemq.artemis.core.protocol.core.CoreRemotingConnection;
import org.apache.activemq.artemis.core.protocol.core.impl.PacketImpl;
import org.apache.activemq.artemis.core.protocol.core.impl.wireformat.BackupRegistrationMessage;
import org.apache.activemq.artemis.core.protocol.core.impl.wireformat.BackupReplicationStartFailedMessage;
import org.apache.activemq.artemis.core.protocol.core.impl.wireformat.ReplicationLiveIsStoppingMessage;
import org.apache.activemq.artemis.core.remoting.CloseListener;
import org.apache.activemq.artemis.core.remoting.FailureListener;
import org.apache.activemq.artemis.core.remoting.server.RemotingService;
import org.apache.activemq.artemis.core.replication.ReplicationManager;
import org.apache.activemq.artemis.core.server.ActiveMQServerLogger;
import org.apache.activemq.artemis.core.server.NodeManager;
import org.apache.activemq.artemis.core.server.cluster.ClusterConnection;
import org.apache.activemq.artemis.core.server.cluster.ha.ReplicationPrimaryPolicy;
import org.apache.activemq.artemis.quorum.DistributedLock;
import org.apache.activemq.artemis.quorum.DistributedPrimitiveManager;
import org.apache.activemq.artemis.quorum.UnavailableStateException;
import org.apache.activemq.artemis.spi.core.remoting.Acceptor;
import org.jboss.logging.Logger;
import static org.apache.activemq.artemis.core.server.impl.ClusterTopologySearch.searchActiveLiveNodeId;
/**
* This is going to be {@link #run()} just by natural born primary, at the first start.
* Both during a failover or a failback, {@link #run()} isn't going to be used, but only {@link #getActivationChannelHandler(Channel, Acceptor)}.
*/
public class ReplicationPrimaryActivation extends LiveActivation implements DistributedLock.UnavailableLockListener {
private static final Logger LOGGER = Logger.getLogger(ReplicationPrimaryActivation.class);
private static final long DISTRIBUTED_MANAGER_START_TIMEOUT_MILLIS = 20_000;
private static final long BLOCKING_CALLS_TIMEOUT_MILLIS = 5_000;
private final ReplicationPrimaryPolicy policy;
private final ActiveMQServerImpl activeMQServer;
@GuardedBy("replicationLock")
private ReplicationManager replicationManager;
private final Object replicationLock;
private final DistributedPrimitiveManager distributedManager;
private volatile boolean stoppingServer;
public ReplicationPrimaryActivation(final ActiveMQServerImpl activeMQServer,
final DistributedPrimitiveManager distributedManager,
final ReplicationPrimaryPolicy policy) {
this.activeMQServer = activeMQServer;
this.policy = policy;
this.replicationLock = new Object();
this.distributedManager = distributedManager;
}
/**
* used for testing purposes.
*/
public DistributedPrimitiveManager getDistributedManager() {
return distributedManager;
}
@Override
public void freezeConnections(RemotingService remotingService) {
final ReplicationManager replicationManager = getReplicationManager();
if (remotingService != null && replicationManager != null) {
remotingService.freeze(null, replicationManager.getBackupTransportConnection());
} else if (remotingService != null) {
remotingService.freeze(null, null);
}
}
@Override
public void run() {
try {
final NodeManager nodeManager = activeMQServer.getNodeManager();
final String nodeId = nodeManager.readNodeId().toString();
final long dataVersion = nodeManager.readDataVersion();
final DistributedLock liveLock = searchLiveOrAcquireLiveLock(nodeId, BLOCKING_CALLS_TIMEOUT_MILLIS, TimeUnit.MILLISECONDS);
if (liveLock == null) {
return;
}
activeMQServer.initialisePart1(false);
activeMQServer.initialisePart2(false);
// must be registered before checking the caller
liveLock.addListener(this);
// This control is placed here because initialisePart2 is going to load the journal that
// could pause the JVM for enough time to lose lock ownership
if (!liveLock.isHeldByCaller()) {
throw new IllegalStateException("This broker isn't live anymore, probably due to application pauses eg GC, OS etc: failing now");
}
activeMQServer.completeActivation(true);
if (activeMQServer.getIdentity() != null) {
ActiveMQServerLogger.LOGGER.serverIsLive(activeMQServer.getIdentity());
} else {
ActiveMQServerLogger.LOGGER.serverIsLive();
}
} catch (Exception e) {
// async stop it, we don't need to await this to complete
distributedManager.stop();
ActiveMQServerLogger.LOGGER.initializationError(e);
activeMQServer.callActivationFailureListeners(e);
}
}
private DistributedLock searchLiveOrAcquireLiveLock(final String nodeId,
final long blockingCallTimeout,
final TimeUnit unit) throws ActiveMQException, InterruptedException {
if (policy.isCheckForLiveServer()) {
LOGGER.infof("Searching a live server with NodeID = %s", nodeId);
if (searchActiveLiveNodeId(policy.getClusterName(), nodeId, blockingCallTimeout, unit, activeMQServer.getConfiguration())) {
LOGGER.infof("Found a live server with NodeID = %s: restarting as backup", nodeId);
activeMQServer.setHAPolicy(policy.getBackupPolicy());
return null;
}
}
startDistributedPrimitiveManager();
return acquireDistributeLock(getDistributeLock(nodeId), blockingCallTimeout, unit);
}
private void startDistributedPrimitiveManager() throws InterruptedException, ActiveMQException {
LOGGER.infof("Trying to reach the majority of quorum nodes in %d ms.", DISTRIBUTED_MANAGER_START_TIMEOUT_MILLIS);
try {
if (distributedManager.start(DISTRIBUTED_MANAGER_START_TIMEOUT_MILLIS, TimeUnit.MILLISECONDS)) {
return;
}
} catch (InterruptedException ie) {
throw ie;
} catch (Throwable t) {
LOGGER.debug(t);
}
assert !distributedManager.isStarted();
throw new ActiveMQException("Cannot reach the majority of quorum nodes");
}
private DistributedLock getDistributeLock(final String nodeId) throws InterruptedException, ActiveMQException {
try {
return distributedManager.getDistributedLock(nodeId);
} catch (Throwable t) {
try {
distributedManager.stop();
} catch (Throwable ignore) {
// don't care
}
if (t instanceof InterruptedException) {
throw (InterruptedException) t;
}
throw new ActiveMQException("Cannot obtain a live lock instance");
}
}
private DistributedLock acquireDistributeLock(final DistributedLock liveLock,
final long acquireLockTimeout,
final TimeUnit unit) throws InterruptedException, ActiveMQException {
try {
if (liveLock.tryLock(acquireLockTimeout, unit)) {
return liveLock;
}
} catch (UnavailableStateException e) {
LOGGER.debug(e);
}
try {
distributedManager.stop();
} catch (Throwable ignore) {
// don't care
}
throw new ActiveMQException("Failed to become live");
}
@Override
public ChannelHandler getActivationChannelHandler(final Channel channel, final Acceptor acceptorUsed) {
if (stoppingServer) {
return null;
}
return packet -> {
if (packet.getType() == PacketImpl.BACKUP_REGISTRATION) {
onBackupRegistration(channel, acceptorUsed, (BackupRegistrationMessage) packet);
}
};
}
private void onBackupRegistration(final Channel channel,
final Acceptor acceptorUsed,
final BackupRegistrationMessage msg) {
try {
startAsyncReplication(channel.getConnection(), acceptorUsed.getClusterConnection(), msg.getConnector(), msg.isFailBackRequest());
} catch (ActiveMQAlreadyReplicatingException are) {
channel.send(new BackupReplicationStartFailedMessage(BackupReplicationStartFailedMessage.BackupRegistrationProblem.ALREADY_REPLICATING));
} catch (ActiveMQException e) {
LOGGER.debug("Failed to process backup registration packet", e);
channel.send(new BackupReplicationStartFailedMessage(BackupReplicationStartFailedMessage.BackupRegistrationProblem.EXCEPTION));
}
}
private void startAsyncReplication(final CoreRemotingConnection remotingConnection,
final ClusterConnection clusterConnection,
final TransportConfiguration backupTransport,
final boolean isFailBackRequest) throws ActiveMQException {
synchronized (replicationLock) {
if (replicationManager != null) {
throw new ActiveMQAlreadyReplicatingException();
}
if (!activeMQServer.isStarted()) {
throw new ActiveMQIllegalStateException();
}
final ReplicationFailureListener listener = new ReplicationFailureListener();
remotingConnection.addCloseListener(listener);
remotingConnection.addFailureListener(listener);
final ReplicationManager replicationManager = new ReplicationManager(activeMQServer, remotingConnection, clusterConnection.getCallTimeout(), policy.getInitialReplicationSyncTimeout(), activeMQServer.getIOExecutorFactory());
this.replicationManager = replicationManager;
replicationManager.start();
final Thread replicatingThread = new Thread(() -> replicate(replicationManager, clusterConnection, isFailBackRequest, backupTransport));
replicatingThread.setName("async-replication-thread");
replicatingThread.start();
}
}
private void replicate(final ReplicationManager replicationManager,
final ClusterConnection clusterConnection,
final boolean isFailBackRequest,
final TransportConfiguration backupTransport) {
try {
final String nodeID = activeMQServer.getNodeID().toString();
activeMQServer.getStorageManager().startReplication(replicationManager, activeMQServer.getPagingManager(), nodeID, isFailBackRequest && policy.isAllowAutoFailBack(), policy.getInitialReplicationSyncTimeout());
clusterConnection.nodeAnnounced(System.currentTimeMillis(), nodeID, policy.getGroupName(), policy.getScaleDownGroupName(), new Pair<>(null, backupTransport), true);
if (isFailBackRequest && policy.isAllowAutoFailBack()) {
awaitBackupAnnouncementOnFailbackRequest(clusterConnection);
}
} catch (Exception e) {
if (activeMQServer.getState() == ActiveMQServerImpl.SERVER_STATE.STARTED) {
/*
* The reasoning here is that the exception was either caused by (1) the
* (interaction with) the backup, or (2) by an IO Error at the storage. If (1), we
* can swallow the exception and ignore the replication request. If (2) the live
* will crash shortly.
*/
ActiveMQServerLogger.LOGGER.errorStartingReplication(e);
}
try {
ActiveMQServerImpl.stopComponent(replicationManager);
} catch (Exception amqe) {
ActiveMQServerLogger.LOGGER.errorStoppingReplication(amqe);
} finally {
synchronized (replicationLock) {
this.replicationManager = null;
}
}
}
}
/**
* This is handling awaiting backup announcement before trying to failover.
* This broker is a backup broker, acting as a live and ready to restart as a backup
*/
private void awaitBackupAnnouncementOnFailbackRequest(ClusterConnection clusterConnection) throws Exception {
final String nodeID = activeMQServer.getNodeID().toString();
final BackupTopologyListener topologyListener = new BackupTopologyListener(nodeID, clusterConnection.getConnector());
clusterConnection.addClusterTopologyListener(topologyListener);
try {
if (topologyListener.waitForBackup()) {
restartAsBackupAfterFailback();
} else {
ActiveMQServerLogger.LOGGER.failbackMissedBackupAnnouncement();
}
} finally {
clusterConnection.removeClusterTopologyListener(topologyListener);
}
}
/**
* If {@link #asyncStopServer()} happens before this call, the restart just won't happen.
* If {@link #asyncStopServer()} happens after this call, will make the server to stop right after being restarted.
*/
private void restartAsBackupAfterFailback() throws Exception {
if (stoppingServer) {
return;
}
synchronized (this) {
if (stoppingServer) {
return;
}
distributedManager.stop();
activeMQServer.fail(true);
ActiveMQServerLogger.LOGGER.restartingReplicatedBackupAfterFailback();
activeMQServer.setHAPolicy(policy.getBackupPolicy());
activeMQServer.start();
}
}
private void asyncStopServer() {
if (stoppingServer) {
return;
}
synchronized (this) {
if (stoppingServer) {
return;
}
stoppingServer = true;
new Thread(() -> {
try {
activeMQServer.stop();
} catch (Exception e) {
ActiveMQServerLogger.LOGGER.errorRestartingBackupServer(e, activeMQServer);
}
}).start();
}
}
@Override
public void onUnavailableLockEvent() {
LOGGER.error("Quorum UNAVAILABLE: async stopping broker.");
asyncStopServer();
}
private final class ReplicationFailureListener implements FailureListener, CloseListener {
@Override
public void connectionFailed(ActiveMQException exception, boolean failedOver) {
onReplicationConnectionClose();
}
@Override
public void connectionFailed(final ActiveMQException me, boolean failedOver, String scaleDownTargetNodeID) {
connectionFailed(me, failedOver);
}
@Override
public void connectionClosed() {
onReplicationConnectionClose();
}
}
private void onReplicationConnectionClose() {
ExecutorService executorService = activeMQServer.getThreadPool();
if (executorService != null) {
synchronized (replicationLock) {
if (replicationManager == null) {
return;
}
}
executorService.execute(() -> {
synchronized (replicationLock) {
if (replicationManager == null) {
return;
}
// this is going to stop the replication manager
activeMQServer.getStorageManager().stopReplication();
assert !replicationManager.isStarted();
replicationManager = null;
}
});
}
}
@Override
public void close(boolean permanently, boolean restarting) throws Exception {
synchronized (replicationLock) {
replicationManager = null;
}
distributedManager.stop();
// To avoid a NPE cause by the stop
final NodeManager nodeManager = activeMQServer.getNodeManager();
if (nodeManager != null) {
if (permanently) {
nodeManager.crashLiveServer();
} else {
nodeManager.pauseLiveServer();
}
}
}
@Override
public void sendLiveIsStopping() {
final ReplicationManager replicationManager = getReplicationManager();
if (replicationManager == null) {
return;
}
replicationManager.sendLiveIsStopping(ReplicationLiveIsStoppingMessage.LiveStopping.STOP_CALLED);
// this pool gets a 'hard' shutdown, no need to manage the Future of this Runnable.
activeMQServer.getScheduledPool().schedule(replicationManager::clearReplicationTokens, 30, TimeUnit.SECONDS);
}
@Override
public ReplicationManager getReplicationManager() {
synchronized (replicationLock) {
return replicationManager;
}
}
@Override
public boolean isReplicaSync() {
final ReplicationManager replicationManager = getReplicationManager();
if (replicationManager == null) {
return false;
}
return !replicationManager.isSynchronizing();
}
}

View File

@ -32,6 +32,7 @@ import org.apache.activemq.artemis.core.postoffice.PostOffice;
import org.apache.activemq.artemis.core.protocol.core.Channel;
import org.apache.activemq.artemis.core.protocol.core.impl.wireformat.ReplicationLiveIsStoppingMessage;
import org.apache.activemq.artemis.core.replication.ReplicationEndpoint;
import org.apache.activemq.artemis.core.replication.ReplicationEndpoint.ReplicationEndpointEventListener;
import org.apache.activemq.artemis.core.server.ActivationParams;
import org.apache.activemq.artemis.core.server.ActiveMQMessageBundle;
import org.apache.activemq.artemis.core.server.ActiveMQServer;
@ -54,7 +55,7 @@ import static org.apache.activemq.artemis.core.server.cluster.qourum.SharedNothi
import static org.apache.activemq.artemis.core.server.cluster.qourum.SharedNothingBackupQuorum.BACKUP_ACTIVATION.FAIL_OVER;
import static org.apache.activemq.artemis.core.server.cluster.qourum.SharedNothingBackupQuorum.BACKUP_ACTIVATION.STOP;
public final class SharedNothingBackupActivation extends Activation {
public final class SharedNothingBackupActivation extends Activation implements ReplicationEndpointEventListener {
private static final Logger logger = Logger.getLogger(SharedNothingBackupActivation.class);
@ -96,7 +97,7 @@ public final class SharedNothingBackupActivation extends Activation {
assert replicationEndpoint == null;
activeMQServer.resetNodeManager();
backupUpToDate = false;
replicationEndpoint = new ReplicationEndpoint(activeMQServer, ioCriticalErrorListener, attemptFailBack, this);
replicationEndpoint = new ReplicationEndpoint(activeMQServer, attemptFailBack, this);
}
@Override
@ -156,9 +157,6 @@ public final class SharedNothingBackupActivation extends Activation {
logger.debug("Starting backup manager");
activeMQServer.getBackupManager().start();
logger.debug("Set backup Quorum");
replicationEndpoint.setBackupQuorum(backupQuorum);
replicationEndpoint.setExecutor(activeMQServer.getExecutorFactory().getExecutor());
EndpointConnector endpointConnector = new EndpointConnector();
@ -461,7 +459,13 @@ public final class SharedNothingBackupActivation extends Activation {
return backupUpToDate;
}
public void setRemoteBackupUpToDate() {
@Override
public void onLiveNodeId(String nodeId) {
backupQuorum.liveIDSet(nodeId);
}
@Override
public void onRemoteBackupUpToDate() {
activeMQServer.getBackupManager().announceBackup();
backupUpToDate = true;
backupSyncLatch.countDown();
@ -470,7 +474,8 @@ public final class SharedNothingBackupActivation extends Activation {
/**
* @throws ActiveMQException
*/
public void remoteFailOver(ReplicationLiveIsStoppingMessage.LiveStopping finalMessage) throws ActiveMQException {
@Override
public void onLiveStopping(ReplicationLiveIsStoppingMessage.LiveStopping finalMessage) throws ActiveMQException {
if (logger.isTraceEnabled()) {
logger.trace("Remote fail-over, got message=" + finalMessage + ", backupUpToDate=" +
backupUpToDate);
@ -526,4 +531,9 @@ public final class SharedNothingBackupActivation extends Activation {
return replicationEndpoint;
}
}
@Override
public boolean isReplicaSync() {
return isRemoteBackupUpToDate();
}
}

View File

@ -462,4 +462,13 @@ public class SharedNothingLiveActivation extends LiveActivation {
private TransportConfiguration[] connectorNameListToArray(final List<String> connectorNames) {
return activeMQServer.getConfiguration().getTransportConfigurations(connectorNames);
}
@Override
public boolean isReplicaSync() {
final ReplicationManager replicationManager = getReplicationManager();
if (replicationManager == null) {
return false;
}
return !replicationManager.isSynchronizing();
}
}

View File

@ -2605,7 +2605,7 @@
</xsd:annotation>
<xsd:complexType>
<xsd:sequence>
<xsd:element name="data-source-property" type="dataSourcePropertyType" minOccurs="1" maxOccurs="unbounded">
<xsd:element name="data-source-property" type="propertyType" minOccurs="1" maxOccurs="unbounded">
<xsd:annotation>
<xsd:documentation>
A key-value pair option for the DataSource
@ -2682,7 +2682,7 @@
<xsd:attributeGroup ref="xml:specialAttrs"/>
</xsd:complexType>
<xsd:complexType name="dataSourcePropertyType">
<xsd:complexType name="propertyType">
<xsd:attribute name="key" type="xsd:string" use="required">
<xsd:annotation>
<xsd:documentation>
@ -2726,6 +2726,36 @@
<xsd:attributeGroup ref="xml:specialAttrs"/>
</xsd:complexType>
<xsd:complexType name="distributed-primitive-manager">
<xsd:all>
<xsd:element name="class-name" type="xsd:string" minOccurs="0" maxOccurs="1">
<xsd:annotation>
<xsd:documentation>
The distributed-primitive-manager class name
</xsd:documentation>
</xsd:annotation>
</xsd:element>
<xsd:element name="properties" minOccurs="0" maxOccurs="1">
<xsd:annotation>
<xsd:documentation>
A list of options for the distributed-primitive-manager
</xsd:documentation>
</xsd:annotation>
<xsd:complexType>
<xsd:sequence>
<xsd:element name="property" type="propertyType" minOccurs="1" maxOccurs="unbounded">
<xsd:annotation>
<xsd:documentation>
A key-value pair option for the distributed-primitive-manager
</xsd:documentation>
</xsd:annotation>
</xsd:element>
</xsd:sequence>
</xsd:complexType>
</xsd:element>
</xsd:all>
</xsd:complexType>
<xsd:complexType name="haReplicationType">
<xsd:choice>
<xsd:element name="master" type="replicatedPolicyType" minOccurs="0" maxOccurs="1">
@ -2749,6 +2779,20 @@
</xsd:documentation>
</xsd:annotation>
</xsd:element>
<xsd:element name="primary" type="asyncPrimaryPolicyType" minOccurs="0" maxOccurs="1">
<xsd:annotation>
<xsd:documentation>
A primary server configured to replicate.
</xsd:documentation>
</xsd:annotation>
</xsd:element>
<xsd:element name="backup" type="asyncBackupPolicyType" minOccurs="0" maxOccurs="1">
<xsd:annotation>
<xsd:documentation>
A backup server configured to replicate.
</xsd:documentation>
</xsd:annotation>
</xsd:element>
</xsd:choice>
<xsd:attributeGroup ref="xml:specialAttrs"/>
</xsd:complexType>
@ -3119,6 +3163,155 @@
</xsd:all>
<xsd:attributeGroup ref="xml:specialAttrs"/>
</xsd:complexType>
<xsd:complexType name="asyncPrimaryPolicyType">
<xsd:all>
<xsd:element name="manager" type="distributed-primitive-manager" minOccurs="1" maxOccurs="1">
<xsd:annotation>
<xsd:documentation>
It's the manager used to manager distributed locks used for this type of replication.
</xsd:documentation>
</xsd:annotation>
</xsd:element>
<xsd:element name="group-name" type="xsd:string" minOccurs="0" maxOccurs="1">
<xsd:annotation>
<xsd:documentation>
used for replication, if set, (remote) backup servers will only pair with live servers with matching
group-name
</xsd:documentation>
</xsd:annotation>
</xsd:element>
<xsd:element name="cluster-name" type="xsd:string" maxOccurs="1" minOccurs="0">
<xsd:annotation>
<xsd:documentation>
Name of the cluster configuration to use for replication. This setting is only necessary in case you
configure multiple cluster connections. It is used by a replicating backups and by live servers that
may attempt fail-back.
</xsd:documentation>
</xsd:annotation>
</xsd:element>
<xsd:element name="check-for-live-server" type="xsd:boolean" default="false" maxOccurs="1" minOccurs="0">
<xsd:annotation>
<xsd:documentation>
Whether to check the cluster for a (live) server using our own server ID when starting
up. This option is only necessary for performing 'fail-back' on replicating
servers. Strictly speaking this setting only applies to live servers and not to
backups.
</xsd:documentation>
</xsd:annotation>
</xsd:element>
<xsd:element name="initial-replication-sync-timeout" type="xsd:long" default="30000" maxOccurs="1"
minOccurs="0">
<xsd:annotation>
<xsd:documentation>
The amount of time to wait for the replica to acknowledge it has received all the necessary data from
the replicating server at the final step of the initial replication synchronization process.
</xsd:documentation>
</xsd:annotation>
</xsd:element>
<xsd:element name="vote-retries" type="xsd:integer" default="12" minOccurs="0" maxOccurs="1">
<xsd:annotation>
<xsd:documentation>
If we start as a replica and lose connection to the master, how many times should we attempt to vote
for quorum before restarting
</xsd:documentation>
</xsd:annotation>
</xsd:element>
<xsd:element name="vote-retry-wait" type="xsd:long" default="2000" minOccurs="0" maxOccurs="1">
<xsd:annotation>
<xsd:documentation>
How long to wait (in milliseconds) between each vote
</xsd:documentation>
</xsd:annotation>
</xsd:element>
<xsd:element name="retry-replication-wait" type="xsd:long" default="2000" minOccurs="0" maxOccurs="1">
<xsd:annotation>
<xsd:documentation>
If we start as a replica how long to wait (in milliseconds) before trying to replicate again after failing to find a replica
</xsd:documentation>
</xsd:annotation>
</xsd:element>
</xsd:all>
<xsd:attributeGroup ref="xml:specialAttrs"/>
</xsd:complexType>
<xsd:complexType name="asyncBackupPolicyType">
<xsd:all>
<xsd:element name="manager" type="distributed-primitive-manager" minOccurs="1" maxOccurs="1">
<xsd:annotation>
<xsd:documentation>
It's the manager used to manager distributed locks used for this type of replication.
</xsd:documentation>
</xsd:annotation>
</xsd:element>
<xsd:element name="group-name" type="xsd:string" minOccurs="0" maxOccurs="1">
<xsd:annotation>
<xsd:documentation>
used for replication, if set, (remote) backup servers will only pair with live servers with matching
group-name
</xsd:documentation>
</xsd:annotation>
</xsd:element>
<xsd:element name="cluster-name" type="xsd:string" maxOccurs="1" minOccurs="0">
<xsd:annotation>
<xsd:documentation>
Name of the cluster configuration to use for replication. This setting is only necessary in case you
configure multiple cluster connections. It is used by a replicating backups and by live servers that
may attempt fail-back.
</xsd:documentation>
</xsd:annotation>
</xsd:element>
<xsd:element name="max-saved-replicated-journals-size" type="xsd:int" default="2" maxOccurs="1" minOccurs="0">
<xsd:annotation>
<xsd:documentation>
This specifies how many times a replicated backup server can restart after moving its files on start.
Once there are this number of backup journal files the server will stop permanently after if fails
back.
</xsd:documentation>
</xsd:annotation>
</xsd:element>
<xsd:element name="allow-failback" type="xsd:boolean" default="true" maxOccurs="1" minOccurs="0">
<xsd:annotation>
<xsd:documentation>
Whether a server will automatically stop when a another places a request to take over
its place. The use case is when a regular server stops and its backup takes over its
duties, later the main server restarts and requests the server (the former backup) to
stop operating.
</xsd:documentation>
</xsd:annotation>
</xsd:element>
<xsd:element name="initial-replication-sync-timeout" type="xsd:long" default="30000" maxOccurs="1"
minOccurs="0">
<xsd:annotation>
<xsd:documentation>
If we have to start as a replicated server this is the amount of time to wait for the replica to
acknowledge it has received all the necessary data from the replicating server at the final step
of the initial replication synchronization process.
</xsd:documentation>
</xsd:annotation>
</xsd:element>
<xsd:element name="vote-retries" type="xsd:integer" default="12" minOccurs="0" maxOccurs="1">
<xsd:annotation>
<xsd:documentation>
If we lose connection to the master, how many times should we attempt to vote for quorum before restarting
</xsd:documentation>
</xsd:annotation>
</xsd:element>
<xsd:element name="vote-retry-wait" type="xsd:long" default="2000" minOccurs="0" maxOccurs="1">
<xsd:annotation>
<xsd:documentation>
How long to wait (in milliseconds) between each vote
</xsd:documentation>
</xsd:annotation>
</xsd:element>
<xsd:element name="retry-replication-wait" type="xsd:long" default="2000" minOccurs="0" maxOccurs="1">
<xsd:annotation>
<xsd:documentation>
How long to wait (in milliseconds) before trying to replicate again after failing to find a replica
</xsd:documentation>
</xsd:annotation>
</xsd:element>
</xsd:all>
<xsd:attributeGroup ref="xml:specialAttrs"/>
</xsd:complexType>
<xsd:complexType name="colocatedReplicaPolicyType">
<xsd:all>
<xsd:element name="group-name" type="xsd:string" minOccurs="0" maxOccurs="1">

View File

@ -17,7 +17,12 @@
package org.apache.activemq.artemis.core.config.impl;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import org.apache.activemq.artemis.api.config.ActiveMQDefaultConfiguration;
import org.apache.activemq.artemis.core.config.Configuration;
import org.apache.activemq.artemis.core.config.FileDeploymentManager;
import org.apache.activemq.artemis.core.config.HAPolicyConfiguration;
@ -27,6 +32,8 @@ import org.apache.activemq.artemis.core.server.cluster.ha.HAPolicy;
import org.apache.activemq.artemis.core.server.cluster.ha.LiveOnlyPolicy;
import org.apache.activemq.artemis.core.server.cluster.ha.ReplicaPolicy;
import org.apache.activemq.artemis.core.server.cluster.ha.ReplicatedPolicy;
import org.apache.activemq.artemis.core.server.cluster.ha.ReplicationBackupPolicy;
import org.apache.activemq.artemis.core.server.cluster.ha.ReplicationPrimaryPolicy;
import org.apache.activemq.artemis.core.server.cluster.ha.ScaleDownPolicy;
import org.apache.activemq.artemis.core.server.cluster.ha.SharedStoreMasterPolicy;
import org.apache.activemq.artemis.core.server.cluster.ha.SharedStoreSlavePolicy;
@ -35,11 +42,19 @@ import org.apache.activemq.artemis.core.server.impl.ActiveMQServerImpl;
import org.apache.activemq.artemis.core.server.impl.ColocatedActivation;
import org.apache.activemq.artemis.core.server.impl.FileLockNodeManager;
import org.apache.activemq.artemis.core.server.impl.LiveOnlyActivation;
import org.apache.activemq.artemis.core.server.impl.ReplicationBackupActivation;
import org.apache.activemq.artemis.core.server.impl.ReplicationPrimaryActivation;
import org.apache.activemq.artemis.core.server.impl.SharedNothingBackupActivation;
import org.apache.activemq.artemis.core.server.impl.SharedNothingLiveActivation;
import org.apache.activemq.artemis.core.server.impl.SharedStoreBackupActivation;
import org.apache.activemq.artemis.core.server.impl.SharedStoreLiveActivation;
import org.apache.activemq.artemis.quorum.DistributedLock;
import org.apache.activemq.artemis.quorum.DistributedPrimitiveManager;
import org.apache.activemq.artemis.quorum.MutableLong;
import org.apache.activemq.artemis.quorum.UnavailableStateException;
import org.apache.activemq.artemis.tests.util.ActiveMQTestBase;
import org.hamcrest.MatcherAssert;
import org.hamcrest.core.IsInstanceOf;
import org.junit.Test;
import static org.hamcrest.CoreMatchers.instanceOf;
@ -124,6 +139,248 @@ public class HAPolicyConfigurationTest extends ActiveMQTestBase {
liveOnlyTest("live-only-hapolicy-config5.xml");
}
public static class FakeDistributedPrimitiveManager implements DistributedPrimitiveManager {
private final Map<String, String> config;
private boolean started;
private DistributedLock lock;
public FakeDistributedPrimitiveManager(Map<String, String> config) {
this.config = config;
this.started = false;
}
public Map<String, String> getConfig() {
return config;
}
@Override
public void addUnavailableManagerListener(UnavailableManagerListener listener) {
// no op
}
@Override
public void removeUnavailableManagerListener(UnavailableManagerListener listener) {
// no op
}
@Override
public boolean start(long timeout, TimeUnit unit) throws InterruptedException, ExecutionException {
started = true;
return true;
}
@Override
public void start() throws InterruptedException, ExecutionException {
started = true;
}
@Override
public boolean isStarted() {
return started;
}
@Override
public void stop() {
started = false;
if (lock != null) {
lock.close();
}
lock = null;
}
@Override
public DistributedLock getDistributedLock(String lockId) {
if (!started) {
throw new IllegalStateException("need to start first");
}
if (lock == null) {
lock = new DistributedLock() {
private boolean held;
@Override
public String getLockId() {
return lockId;
}
@Override
public boolean isHeldByCaller() throws UnavailableStateException {
return held;
}
@Override
public boolean tryLock() throws UnavailableStateException, InterruptedException {
if (held) {
return false;
}
held = true;
return true;
}
@Override
public void unlock() throws UnavailableStateException {
held = false;
}
@Override
public void addListener(UnavailableLockListener listener) {
}
@Override
public void removeListener(UnavailableLockListener listener) {
}
@Override
public void close() {
held = false;
}
};
} else if (!lock.getLockId().equals(lockId)) {
throw new IllegalStateException("This shouldn't happen");
}
return lock;
}
@Override
public MutableLong getMutableLong(String mutableLongId) throws InterruptedException, ExecutionException, TimeoutException {
// TODO
return null;
}
@Override
public void close() {
stop();
}
}
private static void validateManagerConfig(Map<String, String> config) {
assertEquals("127.0.0.1:6666", config.get("connect-string"));
assertEquals("16000", config.get("session-ms"));
assertEquals("2000", config.get("connection-ms"));
assertEquals("2", config.get("retries"));
assertEquals("2000", config.get("retries-ms"));
assertEquals("test", config.get("namespace"));
assertEquals("10", config.get("session-percent"));
assertEquals(7, config.size());
}
@Test
public void PrimaryReplicationTest() throws Exception {
Configuration configuration = createConfiguration("primary-hapolicy-config.xml");
ActiveMQServerImpl server = new ActiveMQServerImpl(configuration);
try {
server.start();
Activation activation = server.getActivation();
assertTrue(activation instanceof ReplicationPrimaryActivation);
HAPolicy haPolicy = server.getHAPolicy();
assertTrue(haPolicy instanceof ReplicationPrimaryPolicy);
ReplicationPrimaryPolicy policy = (ReplicationPrimaryPolicy) haPolicy;
assertFalse(policy.isAllowAutoFailBack());
assertEquals(9876, policy.getInitialReplicationSyncTimeout());
assertFalse(policy.canScaleDown());
assertFalse(policy.isBackup());
assertFalse(policy.isSharedStore());
assertTrue(policy.isCheckForLiveServer());
assertTrue(policy.isWaitForActivation());
assertEquals("purple", policy.getGroupName());
assertEquals("purple", policy.getBackupGroupName());
assertEquals("abcdefg", policy.getClusterName());
assertFalse(policy.useQuorumManager());
// check failback companion backup policy
ReplicationBackupPolicy failbackPolicy = policy.getBackupPolicy();
assertNotNull(failbackPolicy);
assertSame(policy, failbackPolicy.getLivePolicy());
assertEquals(policy.getGroupName(), failbackPolicy.getGroupName());
assertEquals(policy.getBackupGroupName(), failbackPolicy.getBackupGroupName());
assertEquals(policy.getClusterName(), failbackPolicy.getClusterName());
assertEquals(failbackPolicy.getMaxSavedReplicatedJournalsSize(), ActiveMQDefaultConfiguration.getDefaultMaxSavedReplicatedJournalsSize());
assertEquals(1, failbackPolicy.getVoteRetries());
assertEquals(1000, failbackPolicy.getVoteRetryWait());
assertTrue(failbackPolicy.isTryFailback());
assertTrue(failbackPolicy.isBackup());
assertFalse(failbackPolicy.isSharedStore());
assertTrue(failbackPolicy.isWaitForActivation());
assertFalse(failbackPolicy.useQuorumManager());
assertEquals(12345, failbackPolicy.getRetryReplicationWait());
// check scale-down properties
assertFalse(failbackPolicy.canScaleDown());
assertNull(failbackPolicy.getScaleDownClustername());
assertNull(failbackPolicy.getScaleDownGroupName());
// validate manager
DistributedPrimitiveManager manager = ((ReplicationPrimaryActivation) activation).getDistributedManager();
assertNotNull(manager);
assertEquals(FakeDistributedPrimitiveManager.class.getName(), manager.getClass().getName());
MatcherAssert.assertThat(manager, IsInstanceOf.instanceOf(FakeDistributedPrimitiveManager.class));
FakeDistributedPrimitiveManager forwardingManager = (FakeDistributedPrimitiveManager) manager;
// validate manager config
validateManagerConfig(forwardingManager.getConfig());
} finally {
server.stop();
}
}
@Test
public void BackupReplicationTest() throws Exception {
Configuration configuration = createConfiguration("backup-hapolicy-config.xml");
ActiveMQServerImpl server = new ActiveMQServerImpl(configuration);
try {
server.start();
Activation activation = server.getActivation();
assertTrue(activation instanceof ReplicationBackupActivation);
HAPolicy haPolicy = server.getHAPolicy();
assertTrue(haPolicy instanceof ReplicationBackupPolicy);
ReplicationBackupPolicy policy = (ReplicationBackupPolicy) haPolicy;
assertEquals("tiddles", policy.getGroupName());
assertEquals("tiddles", policy.getBackupGroupName());
assertEquals("33rrrrr", policy.getClusterName());
assertEquals(22, policy.getMaxSavedReplicatedJournalsSize());
assertEquals(1, policy.getVoteRetries());
assertEquals(1000, policy.getVoteRetryWait());
assertFalse(policy.isTryFailback());
assertTrue(policy.isBackup());
assertFalse(policy.isSharedStore());
assertTrue(policy.isWaitForActivation());
assertFalse(policy.useQuorumManager());
assertEquals(12345, policy.getRetryReplicationWait());
// check scale-down properties
assertFalse(policy.canScaleDown());
assertNull(policy.getScaleDownClustername());
assertNull(policy.getScaleDownGroupName());
// check failover companion live policy
ReplicationPrimaryPolicy failoverLivePolicy = policy.getLivePolicy();
assertNotNull(failoverLivePolicy);
assertSame(policy, failoverLivePolicy.getBackupPolicy());
assertFalse(failoverLivePolicy.isAllowAutoFailBack());
assertEquals(9876, failoverLivePolicy.getInitialReplicationSyncTimeout());
assertFalse(failoverLivePolicy.canScaleDown());
assertFalse(failoverLivePolicy.isBackup());
assertFalse(failoverLivePolicy.isSharedStore());
assertFalse(failoverLivePolicy.isCheckForLiveServer());
assertTrue(failoverLivePolicy.isWaitForActivation());
assertEquals(policy.getGroupName(), failoverLivePolicy.getGroupName());
assertEquals(policy.getClusterName(), failoverLivePolicy.getClusterName());
assertEquals(policy.getBackupGroupName(), failoverLivePolicy.getBackupGroupName());
assertFalse(failoverLivePolicy.useQuorumManager());
// check scale-down properties
assertFalse(failoverLivePolicy.canScaleDown());
assertNull(failoverLivePolicy.getScaleDownClustername());
assertNull(failoverLivePolicy.getScaleDownGroupName());
// validate manager
DistributedPrimitiveManager manager = ((ReplicationBackupActivation) activation).getDistributedManager();
assertNotNull(manager);
assertEquals(FakeDistributedPrimitiveManager.class.getName(), manager.getClass().getName());
MatcherAssert.assertThat(manager, IsInstanceOf.instanceOf(FakeDistributedPrimitiveManager.class));
FakeDistributedPrimitiveManager forwardingManager = (FakeDistributedPrimitiveManager) manager;
// validate manager config
validateManagerConfig(forwardingManager.getConfig());
} finally {
server.stop();
}
}
@Test
public void ReplicatedTest() throws Exception {
Configuration configuration = createConfiguration("replicated-hapolicy-config.xml");

View File

@ -113,6 +113,7 @@ import org.apache.activemq.artemis.core.remoting.impl.invm.TransportConstants;
import org.apache.activemq.artemis.core.remoting.impl.netty.NettyAcceptorFactory;
import org.apache.activemq.artemis.core.remoting.impl.netty.NettyConnector;
import org.apache.activemq.artemis.core.remoting.impl.netty.NettyConnectorFactory;
import org.apache.activemq.artemis.core.replication.ReplicationEndpoint;
import org.apache.activemq.artemis.core.server.ActiveMQComponent;
import org.apache.activemq.artemis.core.server.ActiveMQServer;
import org.apache.activemq.artemis.core.server.ActiveMQServerLogger;
@ -129,6 +130,7 @@ import org.apache.activemq.artemis.core.server.impl.Activation;
import org.apache.activemq.artemis.core.server.impl.ActiveMQServerImpl;
import org.apache.activemq.artemis.core.server.impl.AddressInfo;
import org.apache.activemq.artemis.core.server.impl.LiveOnlyActivation;
import org.apache.activemq.artemis.core.server.impl.ReplicationBackupActivation;
import org.apache.activemq.artemis.core.server.impl.SharedNothingBackupActivation;
import org.apache.activemq.artemis.core.settings.impl.AddressFullMessagePolicy;
import org.apache.activemq.artemis.core.settings.impl.AddressSettings;
@ -1384,6 +1386,8 @@ public abstract class ActiveMQTestBase extends Assert {
if (isReplicated) {
if (activation instanceof SharedNothingBackupActivation) {
isRemoteUpToDate = backup.isReplicaSync();
} else if (activation instanceof ReplicationBackupActivation) {
isRemoteUpToDate = backup.isReplicaSync();
} else {
//we may have already failed over and changed the Activation
if (actualServer.isStarted()) {
@ -2517,6 +2521,17 @@ public abstract class ActiveMQTestBase extends Assert {
return !hadToInterrupt;
}
protected static ReplicationEndpoint getReplicationEndpoint(ActiveMQServer server) {
final Activation activation = server.getActivation();
if (activation instanceof SharedNothingBackupActivation) {
return ((SharedNothingBackupActivation) activation).getReplicationEndpoint();
}
if (activation instanceof ReplicationBackupActivation) {
return ((ReplicationBackupActivation) activation).getReplicationEndpoint();
}
return null;
}
// Private -------------------------------------------------------
// Inner classes -------------------------------------------------

View File

@ -0,0 +1,54 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<configuration
xmlns="urn:activemq"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="urn:activemq /schema/artemis-server.xsd">
<core xmlns="urn:activemq:core">
<discovery-groups>
<discovery-group name="wahey"/>
</discovery-groups>
<ha-policy>
<replication>
<backup>
<group-name>tiddles</group-name>
<max-saved-replicated-journals-size>22</max-saved-replicated-journals-size>
<cluster-name>33rrrrr</cluster-name>
<initial-replication-sync-timeout>9876</initial-replication-sync-timeout>
<retry-replication-wait>12345</retry-replication-wait>
<vote-retries>1</vote-retries>
<vote-retry-wait>1000</vote-retry-wait>
<allow-failback>false</allow-failback>
<manager>
<class-name>
org.apache.activemq.artemis.core.config.impl.HAPolicyConfigurationTest$FakeDistributedPrimitiveManager
</class-name>
<properties>
<property key="connect-string" value="127.0.0.1:6666"/>
<property key="session-ms" value="16000"/>
<property key="connection-ms" value="2000"/>
<property key="retries" value="2"/>
<property key="retries-ms" value="2000"/>
<property key="namespace" value="test"/>
<property key="session-percent" value="10"/>
</properties>
</manager>
</backup>
</replication>
</ha-policy>
</core>
</configuration>

View File

@ -0,0 +1,52 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<configuration
xmlns="urn:activemq"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="urn:activemq /schema/artemis-server.xsd">
<core xmlns="urn:activemq:core">
<ha-policy>
<replication>
<primary>
<group-name>purple</group-name>
<cluster-name>abcdefg</cluster-name>
<initial-replication-sync-timeout>9876</initial-replication-sync-timeout>
<retry-replication-wait>12345</retry-replication-wait>
<check-for-live-server>true</check-for-live-server>
<vote-retries>1</vote-retries>
<vote-retry-wait>1000</vote-retry-wait>
<manager>
<class-name>
org.apache.activemq.artemis.core.config.impl.HAPolicyConfigurationTest$FakeDistributedPrimitiveManager
</class-name>
<properties>
<property key="connect-string" value="127.0.0.1:6666"/>
<property key="session-ms" value="16000"/>
<property key="connection-ms" value="2000"/>
<property key="retries" value="2"/>
<property key="retries-ms" value="2000"/>
<property key="namespace" value="test"/>
<property key="session-percent" value="10"/>
</properties>
</manager>
</primary>
</replication>
</ha-policy>
</core>
</configuration>

View File

@ -98,6 +98,36 @@ or
</ha-policy>
```
*Replication* allows too to configure 2 new roles to enable *pluggable quorum* provider configuration, by using:
```xml
<ha-policy>
<replication>
<primary/>
</replication>
</ha-policy>
```
to configure the classic *master* role, and
```xml
<ha-policy>
<replication>
<backup/>
</replication>
</ha-policy>
```
for the classic *slave* one.
If *replication* is configured using such new roles some additional element is required to complete configuration, detailed later.
### IMPORTANT NOTE ON PLUGGABLE QUORUM VOTE FEATURE
This feature is still **EXPERIMENTAL** and not meant to be run in production yet.
It means:
- its configuration can change until declared as **officially stable**
- it has to solve yet an inherent data misalignment issue with replication (it can happen with `classic` replication as well)
More info about this issue are on [ARTEMIS-3340](https://issues.apache.org/jira/browse/ARTEMIS-3340).
### Data Replication
When using replication, the live and the backup servers do not share the
@ -199,16 +229,26 @@ Much like in the shared-store case, when the live server stops or
crashes, its replicating backup will become active and take over its
duties. Specifically, the backup will become active when it loses
connection to its live server. This can be problematic because this can
also happen because of a temporary network problem. In order to address
this issue, the backup will try to determine whether it still can
also happen because of a temporary network problem.
This issue is solved in 2 different ways depending on which replication roles are configured:
- **classic replication** (`master`/`slave` roles): backup will try to determine whether it still can
connect to the other servers in the cluster. If it can connect to more
than half the servers, it will become active, if more than half the
servers also disappeared with the live, the backup will wait and try
reconnecting with the live. This avoids a split brain situation.
- **pluggable quorum vote replication** (`primary`/`backup` roles): backup relies on a pluggable quorum provider
(configurable via `manager` xml element) to detect if there's any active live.
> ***NOTE***
>
> A backup in the **pluggable quorum vote replication** still need to carefully configure
> [connection-ttl](connection-ttl.md) in order to promptly issue a request to become live to the quorum service
> before failing-over.
#### Configuration
To configure the live and backup servers to be a replicating pair,
To configure a classic replication's live and backup servers to be a replicating pair,
configure the live server in ' `broker.xml` to have:
```xml
@ -235,6 +275,30 @@ The backup server must be similarly configured but as a `slave`
</ha-policy>
```
To configure a pluggable quorum replication's primary and backup instead:
```xml
<ha-policy>
<replication>
<primary/>
</replication>
</ha-policy>
...
<cluster-connections>
<cluster-connection name="my-cluster">
...
</cluster-connection>
</cluster-connections>
```
and
```xml
<ha-policy>
<replication>
<backup/>
</replication>
</ha-policy>
```
#### All Replication Configuration
The following table lists all the `ha-policy` configuration elements for
@ -308,6 +372,142 @@ replica to acknowledge it has received all the necessary data. The
default is 30,000 milliseconds. **Note:** during this interval any
journal related operations will be blocked.
#### Pluggable Quorum Vote Replication configurations
Pluggable Quorum Vote replication configuration options are a bit different
from classic replication, mostly because of its customizable nature.
[Apache curator](https://curator.apache.org/) is used by the default quorum provider.
Below some example configurations to show how it works.
For `primary`:
```xml
<ha-policy>
<replication>
<primary>
<manager>
<class-name>org.apache.activemq.artemis.quorum.zookeeper.CuratorDistributedPrimitiveManager</class-name>
<properties>
<property key="connect-string" value="127.0.0.1:6666,127.0.0.1:6667,127.0.0.1:6668"/>
</properties>
</manager>
<check-for-live-server>true</check-for-live-server>
</primary>
</replication>
</ha-policy>
```
And `backup`:
```xml
<ha-policy>
<replication>
<backup>
<manager>
<class-name>org.apache.activemq.artemis.quorum.zookeeper.CuratorDistributedPrimitiveManager</class-name>
<properties>
<property key="connect-string" value="127.0.0.1:6666,127.0.0.1:6667,127.0.0.1:6668"/>
</properties>
</manager>
<allow-failback>true</allow-failback>
</backup>
</replication>
</ha-policy>
```
The configuration of `class-name` as follows
```xml
<class-name>org.apache.activemq.artemis.quorum.zookeeper.CuratorDistributedPrimitiveManager</class-name>
```
isn't really needed, because Apache Curator is the default provider, but has been shown for completeness.
The `properties` element, instead
```xml
<properties>
<property key="connect-string" value="127.0.0.1:6666,127.0.0.1:6667,127.0.0.1:6668"/>
</properties>
```
Can specify a list of `property` elements in the form of key-value pairs, depending the ones
accepted by the specified `class-name` provider.
Apache Curator's provider allow to configure these properties:
- [`connect-string`](https://curator.apache.org/apidocs/org/apache/curator/framework/CuratorFrameworkFactory.Builder.html#connectString(java.lang.String)): (no default)
- [`session-ms`](https://curator.apache.org/apidocs/org/apache/curator/framework/CuratorFrameworkFactory.Builder.html#sessionTimeoutMs(int)): (default is 18000 ms)
- [`session-percent`](https://curator.apache.org/apidocs/org/apache/curator/framework/CuratorFrameworkFactory.Builder.html#simulatedSessionExpirationPercent(int)): (default is 33); should be <= default,
see https://cwiki.apache.org/confluence/display/CURATOR/TN14 for more info
- [`connection-ms`](https://curator.apache.org/apidocs/org/apache/curator/framework/CuratorFrameworkFactory.Builder.html#connectionTimeoutMs(int)): (default is 8000 ms)
- [`retries`](https://curator.apache.org/apidocs/org/apache/curator/retry/RetryNTimes.html#%3Cinit%3E(int,int)): (default is 1)
- [`retries-ms`](https://curator.apache.org/apidocs/org/apache/curator/retry/RetryNTimes.html#%3Cinit%3E(int,int)): (default is 1000 ms)
- [`namespace`](https://curator.apache.org/apidocs/org/apache/curator/framework/CuratorFrameworkFactory.Builder.html#namespace(java.lang.String)): (no default)
Configuration of the [Apache Zookeeper](https://zookeeper.apache.org/) nodes is left to the user, but there are few
**suggestions to improve the reliability of the quorum service**:
- broker `session_ms` must be `>= 2 * server tick time` and `<= 20 * server tick time` as by
[Zookeeper 3.6.3 admin guide](https://zookeeper.apache.org/doc/r3.6.3/zookeeperAdmin.html): it directly impacts how fast a backup
can failover to an isolated/killed/unresponsive live; the higher, the slower.
- GC on broker machine should allow keeping GC pauses within 1/3 of `session_ms` in order to let the Zookeeper heartbeat protocol
to work reliably: if it's not possible, better increase `session_ms` accepting a slower failover
- Zookeeper must have enough resources to keep GC (and OS) pauses much smaller than server tick time: please consider carefully if
broker and Zookeeper node should share the same physical machine, depending on the expected load of the broker
- network isolation protection requires configuring >=3 Zookeeper nodes
#### *Important*: Notes on pluggable quorum replication configuration
The first `classic` replication configuration that won't apply to the pluggable quorum replication
is `vote-on-replication-failure` and configure it produces a startup error: pluggable quorum replication
always behave like `vote-on-replication-failure` `true` ie shutting down a live broker (and its JVM) in case of quorum loss.
The second deprecated `classic` replication configuration is `quorum-vote-wait`: given that the pluggable quorum vote replication
requires backup to have an always-on reliable quorum service, there's no need to specify the timeout to reach
the majority of quorum nodes. A backup remains inactive (ie JVM still up, console too, unable to sync with live, to failover etc etc)
until the majority of quorum nodes is reachable again, re-activating if happens.
The only exception is with primary failing-back to an existing live backup using `<allow-failback>true</allow-failback>`:
if the quorum service isn't immediately available the primary (and its JVM) just stop, allowing fail-fast failing-back.
There are few *semantic differences* of other existing properties:
- `vote-retry-wait`: in `classic` replication means how long to wait between each quorum vote try, while with pluggable quorum replication
means how long request to failover for each attempt
- `vote-retries`: differently from `classic`, the amount of vote attempt is `1 + vote-retries` (with classic is just `vote-retries`).
Setting `0` means no retries, leaving backup to still perform an initial attempt.
**Notes on replication configuration with [Apache curator](https://curator.apache.org/) quorum provider**
As said some paragraphs above, `session-ms` affect the failover duration: a backup can
failover after `session-ms` expires or if the live broker voluntary give up its role
eg during a fail-back/manual broker stop, it happens immediately.
For the former case (session expiration with live no longer present), the backup broker can detect an unresponsive live by using:
1. cluster connection PINGs (affected by [connection-ttl](connection-ttl.md) tuning)
2. closed TCP connection notification (depends by TCP configuration and networking stack/topology)
These 2 cases have 2 different failover duration depending on different factors:
1. `connection-ttl` affect how much time of the expiring `session-ms` is used to just detect a missing live broker: the higher `connection-tt`,
the slower it reacts; backup can attempt to failover for the remaining `session-ms - connection-ttl`
2. `session-ms` expiration is immediately detected: backup must try to failover for >=`session-ms` to be sure to catch
the session expiration and complete failover
The previous comments are meant to suggest to the careful reader that the minimum time to attempt to failover
cannot be below the full `session-ms` expires.
In short, it means
```
total failover attempt time > session-ms
```
with
```
total failover attempt time = vote-retry-wait * (vote-retries + 1)
```
and by consequence:
```
vote-retry-wait * (vote-retries + 1) > session-ms
```
For example with `session-ms = 18000 ms`, safe values for failover timeout are:
```xml
<vote-retries>11</vote-retries>
<vote-retry-wait>2000</vote-retry-wait>
```
Because `11 * 2000 = 22000 ms` that's bigger then `18000 ms`.
There's no risk that a backup broker will early stop attempting to failover, losing its chance to become live.
### Shared Store
When using a shared store, both live and backup servers share the *same*
@ -406,8 +606,32 @@ stop. This configuration would look like:
</ha-policy>
```
In replication HA mode you need to set an extra property
`check-for-live-server` to `true` in the `master` configuration. If set
The same configuration option can be set for both replications, classic:
```xml
<ha-policy>
<replication>
<slave>
<allow-failback>true</allow-failback>
</slave>
</replication>
</ha-policy>
```
and with pluggable quorum provider:
```xml
<ha-policy>
<replication>
<manager>
<!-- some meaningful configuration -->
</manager>
<backup>
<allow-failback>true</allow-failback>
</backup>
</replication>
</ha-policy>
```
In both replication HA mode you need to set an extra property
`check-for-live-server` to `true` in the `master`/`primary` configuration. If set
to true, during start-up a live server will first search the cluster for
another server using its nodeID. If it finds one, it will contact this
server and try to "fail-back". Since this is a remote replication
@ -418,7 +642,7 @@ to shutdown for it to take over. This is necessary because otherwise the
live server has no means to know whether there was a fail-over or not,
and if there was if the server that took its duties is still running or
not. To configure this option at your `broker.xml`
configuration file as follows:
configuration file as follows, for classic replication:
```xml
<ha-policy>
@ -430,6 +654,29 @@ configuration file as follows:
</ha-policy>
```
And pluggable quorum replication:
```xml
<ha-policy>
<replication>
<manager>
<!-- some meaningful configuration -->
</manager>
<primary>
<check-for-live-server>true</check-for-live-server>
</primary>
</replication>
</ha-policy>
```
The key difference from classic replication is that if `master` cannot reach any
live server with its same nodeID, it's going straight to become live, while `primary`
request it to the quorum provider, searching again for any existing live if
the quorum provider is not available (eg connectivity loss, consensus absence) or
if there's another live broker with the same nodeID alive, in an endless loop.
In short: a started `primary` cannot become live without consensus.
> **Warning**
>
> Be aware that if you restart a live server while after failover has

31
pom.xml
View File

@ -64,6 +64,8 @@
<module>artemis-distribution</module>
<module>tests</module>
<module>artemis-features</module>
<module>artemis-quorum-api</module>
<module>artemis-quorum-ri</module>
</modules>
<name>ActiveMQ Artemis Parent</name>
@ -105,6 +107,9 @@
<mockito.version>3.11.2</mockito.version>
<jctools.version>2.1.2</jctools.version>
<netty.version>4.1.66.Final</netty.version>
<curator.version>5.1.0</curator.version>
<!-- While waiting https://issues.apache.org/jira/browse/CURATOR-595 fix -->
<zookeeper.version>3.6.3</zookeeper.version>
<!-- this is basically for tests -->
<netty-tcnative-version>2.0.40.Final</netty-tcnative-version>
@ -851,6 +856,32 @@
<artifactId>jakarta.security.auth.message-api</artifactId>
<version>${jakarta.security.auth.message-api.version}</version>
</dependency>
<!-- Curator Zookeeper RI -->
<dependency>
<groupId>org.apache.curator</groupId>
<artifactId>curator-recipes</artifactId>
<version>${curator.version}</version>
</dependency>
<dependency>
<groupId>org.apache.curator</groupId>
<artifactId>curator-framework</artifactId>
<version>${curator.version}</version>
</dependency>
<dependency>
<groupId>org.apache.curator</groupId>
<artifactId>curator-client</artifactId>
<version>${curator.version}</version>
</dependency>
<dependency>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
<version>${zookeeper.version}</version>
</dependency>
<dependency>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper-jute</artifactId>
<version>${zookeeper.version}</version>
</dependency>
</dependencies>
</dependencyManagement>

View File

@ -44,9 +44,9 @@ public class ScaleDownFailoverTest extends ClusterTestBase {
public void setUp() throws Exception {
super.setUp();
stopCount = 0;
setupLiveServer(0, isFileStorage(), false, isNetty(), true);
setupLiveServer(1, isFileStorage(), false, isNetty(), true);
setupLiveServer(2, isFileStorage(), false, isNetty(), true);
setupLiveServer(0, isFileStorage(), HAType.SharedNothingReplication, isNetty(), true);
setupLiveServer(1, isFileStorage(), HAType.SharedNothingReplication, isNetty(), true);
setupLiveServer(2, isFileStorage(), HAType.SharedNothingReplication, isNetty(), true);
ScaleDownConfiguration scaleDownConfiguration = new ScaleDownConfiguration();
ScaleDownConfiguration scaleDownConfiguration2 = new ScaleDownConfiguration();
scaleDownConfiguration2.setEnabled(false);

View File

@ -35,8 +35,8 @@ public class ScaleDownFailureTest extends ClusterTestBase {
@Before
public void setUp() throws Exception {
super.setUp();
setupLiveServer(0, isFileStorage(), false, isNetty(), true);
setupLiveServer(1, isFileStorage(), false, isNetty(), true);
setupLiveServer(0, isFileStorage(), HAType.SharedNothingReplication, isNetty(), true);
setupLiveServer(1, isFileStorage(), HAType.SharedNothingReplication, isNetty(), true);
if (isGrouped()) {
ScaleDownConfiguration scaleDownConfiguration = new ScaleDownConfiguration();
scaleDownConfiguration.setGroupName("bill");

View File

@ -51,6 +51,12 @@
<scope>test</scope>
<type>test-jar</type>
</dependency>
<dependency>
<groupId>org.apache.activemq</groupId>
<artifactId>artemis-quorum-ri</artifactId>
<version>${project.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.activemq.tests</groupId>
<artifactId>unit-tests</artifactId>

View File

@ -93,12 +93,11 @@ public class InfiniteRedeliveryTest extends ActiveMQTestBase {
backupConfig = createDefaultConfig(0, true);
liveConfig = createDefaultConfig(0, true);
ReplicatedBackupUtils.configureReplicationPair(backupConfig, backupConnector, backupAcceptor, liveConfig, liveConnector, null);
configureReplicationPair(backupConnector, backupAcceptor, liveConnector);
backupConfig.setBindingsDirectory(getBindingsDir(0, true)).setJournalDirectory(getJournalDir(0, true)).setPagingDirectory(getPageDir(0, true)).setLargeMessagesDirectory(getLargeMessagesDir(0, true)).setSecurityEnabled(false);
((ReplicaPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setMaxSavedReplicatedJournalsSize(-1).setAllowFailBack(true);
((ReplicaPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setRestartBackup(false);
nodeManager = new InVMNodeManager(true, backupConfig.getJournalLocation());
@ -109,6 +108,14 @@ public class InfiniteRedeliveryTest extends ActiveMQTestBase {
liveServer = createTestableServer(liveConfig, nodeManager);
}
protected void configureReplicationPair(TransportConfiguration backupConnector,
TransportConfiguration backupAcceptor,
TransportConfiguration liveConnector) {
ReplicatedBackupUtils.configureReplicationPair(backupConfig, backupConnector, backupAcceptor, liveConfig, liveConnector, null);
((ReplicaPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setMaxSavedReplicatedJournalsSize(-1).setAllowFailBack(true);
((ReplicaPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setRestartBackup(false);
}
@Before
@Override

View File

@ -0,0 +1,60 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.tests.integration.client;
import java.util.Collections;
import org.apache.activemq.artemis.api.core.TransportConfiguration;
import org.apache.activemq.artemis.core.config.ha.DistributedPrimitiveManagerConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
import org.apache.activemq.artemis.quorum.file.FileBasedPrimitiveManager;
import org.apache.activemq.artemis.tests.util.ReplicatedBackupUtils;
import org.junit.Before;
import org.junit.Rule;
import org.junit.rules.TemporaryFolder;
public class PluggableQuorumInfiniteRedeliveryTest extends InfiniteRedeliveryTest {
@Rule
public TemporaryFolder tmpFolder = new TemporaryFolder();
private DistributedPrimitiveManagerConfiguration managerConfiguration;
public PluggableQuorumInfiniteRedeliveryTest(String protocol, boolean useCLI) {
super(protocol, useCLI);
}
@Before
@Override
public void setUp() throws Exception {
super.setUp();
this.managerConfiguration = new DistributedPrimitiveManagerConfiguration(FileBasedPrimitiveManager.class.getName(),
Collections.singletonMap("locks-folder", tmpFolder.newFolder("manager").toString()));
}
@Override
protected void configureReplicationPair(TransportConfiguration backupConnector,
TransportConfiguration backupAcceptor,
TransportConfiguration liveConnector) {
ReplicatedBackupUtils.configurePluggableQuorumReplicationPair(backupConfig, backupConnector, backupAcceptor,
liveConfig, liveConnector, null,
managerConfiguration, managerConfiguration);
((ReplicationBackupPolicyConfiguration) backupConfig.getHAPolicyConfiguration())
.setMaxSavedReplicatedJournalsSize(-1).setAllowFailBack(true);
}
}

View File

@ -17,6 +17,7 @@
package org.apache.activemq.artemis.tests.integration.cluster.distribution;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.net.URI;
@ -56,9 +57,12 @@ import org.apache.activemq.artemis.core.client.impl.TopologyMemberImpl;
import org.apache.activemq.artemis.core.config.ClusterConnectionConfiguration;
import org.apache.activemq.artemis.core.config.Configuration;
import org.apache.activemq.artemis.core.config.HAPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.DistributedPrimitiveManagerConfiguration;
import org.apache.activemq.artemis.core.config.ha.LiveOnlyPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicaPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicatedPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.SharedStoreMasterPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.SharedStoreSlavePolicyConfiguration;
import org.apache.activemq.artemis.core.postoffice.Binding;
@ -85,6 +89,7 @@ import org.apache.activemq.artemis.core.server.group.GroupingHandler;
import org.apache.activemq.artemis.core.server.group.impl.GroupingHandlerConfiguration;
import org.apache.activemq.artemis.core.server.impl.AddressInfo;
import org.apache.activemq.artemis.core.server.impl.InVMNodeManager;
import org.apache.activemq.artemis.quorum.file.FileBasedPrimitiveManager;
import org.apache.activemq.artemis.tests.util.ActiveMQTestBase;
import org.apache.activemq.artemis.utils.PortCheckRule;
import org.jboss.logging.Logger;
@ -92,9 +97,14 @@ import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.ClassRule;
import org.junit.Rule;
import org.junit.rules.TemporaryFolder;
public abstract class ClusterTestBase extends ActiveMQTestBase {
@Rule
public TemporaryFolder tmpFolder = new TemporaryFolder();
private static final Logger log = Logger.getLogger(ClusterTestBase.class);
private static final int[] PORTS = {TransportConstants.DEFAULT_PORT, TransportConstants.DEFAULT_PORT + 1, TransportConstants.DEFAULT_PORT + 2, TransportConstants.DEFAULT_PORT + 3, TransportConstants.DEFAULT_PORT + 4, TransportConstants.DEFAULT_PORT + 5, TransportConstants.DEFAULT_PORT + 6, TransportConstants.DEFAULT_PORT + 7, TransportConstants.DEFAULT_PORT + 8, TransportConstants.DEFAULT_PORT + 9,};
@ -134,6 +144,21 @@ public abstract class ClusterTestBase extends ActiveMQTestBase {
return true;
}
private DistributedPrimitiveManagerConfiguration pluggableQuorumConfiguration = null;
private DistributedPrimitiveManagerConfiguration getOrCreatePluggableQuorumConfiguration() {
if (pluggableQuorumConfiguration != null) {
return pluggableQuorumConfiguration;
}
try {
pluggableQuorumConfiguration = new DistributedPrimitiveManagerConfiguration(FileBasedPrimitiveManager.class.getName(), Collections.singletonMap("locks-folder", tmpFolder.newFolder("manager").toString()));
} catch (IOException ioException) {
log.error(ioException);
return null;
}
return pluggableQuorumConfiguration;
}
@Override
@Before
public void setUp() throws Exception {
@ -159,11 +184,19 @@ public abstract class ClusterTestBase extends ActiveMQTestBase {
}
public enum HAType {
SharedStore, SharedNothingReplication, PluggableQuorumReplication
}
protected HAType haType() {
return HAType.SharedNothingReplication;
}
/**
* Whether the servers share the storage or not.
*/
protected boolean isSharedStore() {
return false;
protected final boolean isSharedStore() {
return HAType.SharedStore.equals(haType());
}
@Override
@ -1481,14 +1514,14 @@ public abstract class ClusterTestBase extends ActiveMQTestBase {
}
protected void setupServer(final int node, final boolean fileStorage, final boolean netty) throws Exception {
setupLiveServer(node, fileStorage, false, netty, false);
setupLiveServer(node, fileStorage, HAType.SharedNothingReplication, netty, false);
}
protected void setupLiveServer(final int node,
final boolean fileStorage,
final boolean netty,
boolean isLive) throws Exception {
setupLiveServer(node, fileStorage, false, netty, isLive);
setupLiveServer(node, fileStorage, HAType.SharedNothingReplication, netty, isLive);
}
protected boolean isResolveProtocols() {
@ -1497,27 +1530,26 @@ public abstract class ClusterTestBase extends ActiveMQTestBase {
protected void setupLiveServer(final int node,
final boolean fileStorage,
final boolean sharedStorage,
final HAType haType,
final boolean netty,
boolean liveOnly) throws Exception {
if (servers[node] != null) {
throw new IllegalArgumentException("Already a server at node " + node);
}
HAPolicyConfiguration haPolicyConfiguration = null;
final HAPolicyConfiguration haPolicyConfiguration;
if (liveOnly) {
haPolicyConfiguration = new LiveOnlyPolicyConfiguration();
} else {
if (sharedStorage)
haPolicyConfiguration = new SharedStoreMasterPolicyConfiguration();
else
haPolicyConfiguration = new ReplicatedPolicyConfiguration();
haPolicyConfiguration = haPolicyLiveConfiguration(haType);
}
Configuration configuration = createBasicConfig(node).setJournalMaxIO_AIO(1000).setThreadPoolMaxSize(10).clearAcceptorConfigurations().addAcceptorConfiguration(createTransportConfiguration(netty, true, generateParams(node, netty))).setHAPolicyConfiguration(haPolicyConfiguration).setResolveProtocols(isResolveProtocols());
ActiveMQServer server;
final boolean sharedStorage = HAType.SharedStore.equals(haType);
if (fileStorage) {
if (sharedStorage) {
server = createInVMFailoverServer(true, configuration, nodeManagers[node], node);
@ -1538,6 +1570,20 @@ public abstract class ClusterTestBase extends ActiveMQTestBase {
servers[node] = addServer(server);
}
private HAPolicyConfiguration haPolicyLiveConfiguration(HAType haType) {
switch (haType) {
case SharedStore:
return new SharedStoreMasterPolicyConfiguration();
case SharedNothingReplication:
return new ReplicatedPolicyConfiguration();
case PluggableQuorumReplication:
return ReplicationPrimaryPolicyConfiguration.withDefault()
.setDistributedManagerConfiguration(getOrCreatePluggableQuorumConfiguration());
default:
throw new AssertionError("Unsupported haType = " + haType);
}
}
/**
* Server lacks a {@link ClusterConnectionConfiguration} necessary for the remote (replicating)
* backup case.
@ -1549,14 +1595,14 @@ public abstract class ClusterTestBase extends ActiveMQTestBase {
* @param node
* @param liveNode
* @param fileStorage
* @param sharedStorage
* @param haType
* @param netty
* @throws Exception
*/
protected void setupBackupServer(final int node,
final int liveNode,
final boolean fileStorage,
final boolean sharedStorage,
final HAType haType,
final boolean netty) throws Exception {
if (servers[node] != null) {
throw new IllegalArgumentException("Already a server at node " + node);
@ -1566,7 +1612,9 @@ public abstract class ClusterTestBase extends ActiveMQTestBase {
TransportConfiguration backupConfig = createTransportConfiguration(netty, false, generateParams(node, netty));
TransportConfiguration acceptorConfig = createTransportConfiguration(netty, true, generateParams(node, netty));
Configuration configuration = createBasicConfig(sharedStorage ? liveNode : node).clearAcceptorConfigurations().addAcceptorConfiguration(acceptorConfig).addConnectorConfiguration(liveConfig.getName(), liveConfig).addConnectorConfiguration(backupConfig.getName(), backupConfig).setHAPolicyConfiguration(sharedStorage ? new SharedStoreSlavePolicyConfiguration() : new ReplicaPolicyConfiguration());
final boolean sharedStorage = HAType.SharedStore.equals(haType);
Configuration configuration = createBasicConfig(sharedStorage ? liveNode : node).clearAcceptorConfigurations().addAcceptorConfiguration(acceptorConfig).addConnectorConfiguration(liveConfig.getName(), liveConfig).addConnectorConfiguration(backupConfig.getName(), backupConfig).setHAPolicyConfiguration(haPolicyBackupConfiguration(haType));
ActiveMQServer server;
@ -1580,6 +1628,21 @@ public abstract class ClusterTestBase extends ActiveMQTestBase {
servers[node] = addServer(server);
}
private HAPolicyConfiguration haPolicyBackupConfiguration(HAType haType) {
switch (haType) {
case SharedStore:
return new SharedStoreSlavePolicyConfiguration();
case SharedNothingReplication:
return new ReplicaPolicyConfiguration();
case PluggableQuorumReplication:
return ReplicationBackupPolicyConfiguration.withDefault()
.setDistributedManagerConfiguration(getOrCreatePluggableQuorumConfiguration());
default:
throw new AssertionError("Unsupported ha type = " + haType);
}
}
protected void setupLiveServerWithDiscovery(final int node,
final String groupAddress,
final int port,

View File

@ -87,14 +87,14 @@ public class ClusterWithBackupTest extends ClusterTestBase {
protected void setupServers() throws Exception {
// The backups
setupBackupServer(0, 3, isFileStorage(), true, isNetty());
setupBackupServer(1, 4, isFileStorage(), true, isNetty());
setupBackupServer(2, 5, isFileStorage(), true, isNetty());
setupBackupServer(0, 3, isFileStorage(), HAType.SharedStore, isNetty());
setupBackupServer(1, 4, isFileStorage(), HAType.SharedStore, isNetty());
setupBackupServer(2, 5, isFileStorage(), HAType.SharedStore, isNetty());
// The lives
setupLiveServer(3, isFileStorage(), true, isNetty(), false);
setupLiveServer(4, isFileStorage(), true, isNetty(), false);
setupLiveServer(5, isFileStorage(), true, isNetty(), false);
setupLiveServer(3, isFileStorage(), HAType.SharedStore, isNetty(), false);
setupLiveServer(4, isFileStorage(), HAType.SharedStore, isNetty(), false);
setupLiveServer(5, isFileStorage(), HAType.SharedStore, isNetty(), false);
}
}

View File

@ -46,14 +46,14 @@ public class SimpleSymmetricClusterTest extends ClusterTestBase {
@Test
public void testSimpleWithBackup() throws Exception {
// The backups
setupBackupServer(0, 3, isFileStorage(), true, isNetty());
setupBackupServer(1, 4, isFileStorage(), true, isNetty());
setupBackupServer(2, 5, isFileStorage(), true, isNetty());
setupBackupServer(0, 3, isFileStorage(), HAType.SharedStore, isNetty());
setupBackupServer(1, 4, isFileStorage(), HAType.SharedStore, isNetty());
setupBackupServer(2, 5, isFileStorage(), HAType.SharedStore, isNetty());
// The lives
setupLiveServer(3, isFileStorage(), true, isNetty(), false);
setupLiveServer(4, isFileStorage(), true, isNetty(), false);
setupLiveServer(5, isFileStorage(), true, isNetty(), false);
setupLiveServer(3, isFileStorage(), HAType.SharedStore, isNetty(), false);
setupLiveServer(4, isFileStorage(), HAType.SharedStore, isNetty(), false);
setupLiveServer(5, isFileStorage(), HAType.SharedStore, isNetty(), false);
setupClusterConnection("cluster0", "queues", MessageLoadBalancingType.ON_DEMAND, 1, isNetty(), 3, 4, 5);

View File

@ -453,18 +453,18 @@ public class SymmetricClusterWithBackupTest extends SymmetricClusterTest {
@Override
protected void setupServers() throws Exception {
// The backups
setupBackupServer(5, 0, isFileStorage(), true, isNetty());
setupBackupServer(6, 1, isFileStorage(), true, isNetty());
setupBackupServer(7, 2, isFileStorage(), true, isNetty());
setupBackupServer(8, 3, isFileStorage(), true, isNetty());
setupBackupServer(9, 4, isFileStorage(), true, isNetty());
setupBackupServer(5, 0, isFileStorage(), HAType.SharedStore, isNetty());
setupBackupServer(6, 1, isFileStorage(), HAType.SharedStore, isNetty());
setupBackupServer(7, 2, isFileStorage(), HAType.SharedStore, isNetty());
setupBackupServer(8, 3, isFileStorage(), HAType.SharedStore, isNetty());
setupBackupServer(9, 4, isFileStorage(), HAType.SharedStore, isNetty());
// The lives
setupLiveServer(0, isFileStorage(), true, isNetty(), false);
setupLiveServer(1, isFileStorage(), true, isNetty(), false);
setupLiveServer(2, isFileStorage(), true, isNetty(), false);
setupLiveServer(3, isFileStorage(), true, isNetty(), false);
setupLiveServer(4, isFileStorage(), true, isNetty(), false);
setupLiveServer(0, isFileStorage(), HAType.SharedStore, isNetty(), false);
setupLiveServer(1, isFileStorage(), HAType.SharedStore, isNetty(), false);
setupLiveServer(2, isFileStorage(), HAType.SharedStore, isNetty(), false);
setupLiveServer(3, isFileStorage(), HAType.SharedStore, isNetty(), false);
setupLiveServer(4, isFileStorage(), HAType.SharedStore, isNetty(), false);
}
@Override

View File

@ -60,6 +60,8 @@ import org.apache.activemq.artemis.core.server.cluster.ha.BackupPolicy;
import org.apache.activemq.artemis.core.server.cluster.ha.HAPolicy;
import org.apache.activemq.artemis.core.server.cluster.ha.ReplicaPolicy;
import org.apache.activemq.artemis.core.server.cluster.ha.ReplicatedPolicy;
import org.apache.activemq.artemis.core.server.cluster.ha.ReplicationBackupPolicy;
import org.apache.activemq.artemis.core.server.cluster.ha.ReplicationPrimaryPolicy;
import org.apache.activemq.artemis.core.server.cluster.ha.SharedStoreMasterPolicy;
import org.apache.activemq.artemis.core.server.cluster.ha.SharedStoreSlavePolicy;
import org.apache.activemq.artemis.core.server.files.FileMoveManager;
@ -786,7 +788,7 @@ public class FailoverTest extends FailoverTestBase {
((ReplicaPolicy) haPolicy).setMaxSavedReplicatedJournalsSize(1);
}
simpleFailover(haPolicy instanceof ReplicaPolicy, doFailBack);
simpleFailover(haPolicy instanceof ReplicaPolicy || haPolicy instanceof ReplicationBackupPolicy, doFailBack);
}
@Test(timeout = 120000)
@ -816,7 +818,9 @@ public class FailoverTest extends FailoverTestBase {
Thread.sleep(100);
Assert.assertFalse("backup is not running", backupServer.isStarted());
Assert.assertFalse("must NOT be a backup", liveServer.getServer().getHAPolicy() instanceof BackupPolicy);
final boolean isBackup = liveServer.getServer().getHAPolicy() instanceof BackupPolicy ||
liveServer.getServer().getHAPolicy() instanceof ReplicationBackupPolicy;
Assert.assertFalse("must NOT be a backup", isBackup);
adaptLiveConfigForReplicatedFailBack(liveServer);
beforeRestart(liveServer);
liveServer.start();
@ -827,7 +831,8 @@ public class FailoverTest extends FailoverTestBase {
ClientSession session2 = createSession(sf, false, false);
session2.start();
ClientConsumer consumer2 = session2.createConsumer(FailoverTestBase.ADDRESS);
boolean replication = liveServer.getServer().getHAPolicy() instanceof ReplicatedPolicy;
final boolean replication = liveServer.getServer().getHAPolicy() instanceof ReplicatedPolicy ||
liveServer.getServer().getHAPolicy() instanceof ReplicationPrimaryPolicy;
if (replication)
receiveMessages(consumer2, 0, NUM_MESSAGES, true);
assertNoMoreMessages(consumer2);
@ -838,7 +843,7 @@ public class FailoverTest extends FailoverTestBase {
public void testSimpleFailover() throws Exception {
HAPolicy haPolicy = backupServer.getServer().getHAPolicy();
simpleFailover(haPolicy instanceof ReplicaPolicy, false);
simpleFailover(haPolicy instanceof ReplicaPolicy || haPolicy instanceof ReplicationBackupPolicy, false);
}
@Test(timeout = 120000)

View File

@ -19,6 +19,7 @@ package org.apache.activemq.artemis.tests.integration.cluster.failover;
import java.io.IOException;
import java.net.ServerSocket;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
@ -36,15 +37,19 @@ import org.apache.activemq.artemis.core.client.impl.ClientSessionFactoryInternal
import org.apache.activemq.artemis.core.client.impl.ServerLocatorInternal;
import org.apache.activemq.artemis.core.config.ClusterConnectionConfiguration;
import org.apache.activemq.artemis.core.config.Configuration;
import org.apache.activemq.artemis.core.config.ha.DistributedPrimitiveManagerConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicaPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.SharedStoreMasterPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.SharedStoreSlavePolicyConfiguration;
import org.apache.activemq.artemis.core.remoting.impl.invm.InVMConnector;
import org.apache.activemq.artemis.core.remoting.impl.invm.InVMRegistry;
import org.apache.activemq.artemis.core.server.NodeManager;
import org.apache.activemq.artemis.core.server.cluster.ha.HAPolicy;
import org.apache.activemq.artemis.core.server.cluster.ha.ReplicatedPolicy;
import org.apache.activemq.artemis.core.server.cluster.ha.ReplicationPrimaryPolicy;
import org.apache.activemq.artemis.core.server.impl.ActiveMQServerImpl;
import org.apache.activemq.artemis.core.server.impl.InVMNodeManager;
import org.apache.activemq.artemis.quorum.file.FileBasedPrimitiveManager;
import org.apache.activemq.artemis.tests.integration.cluster.util.SameProcessActiveMQServer;
import org.apache.activemq.artemis.tests.integration.cluster.util.TestableServer;
import org.apache.activemq.artemis.tests.util.ActiveMQTestBase;
@ -52,9 +57,13 @@ import org.apache.activemq.artemis.tests.util.ReplicatedBackupUtils;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Rule;
import org.junit.rules.TemporaryFolder;
public abstract class FailoverTestBase extends ActiveMQTestBase {
// Constants -----------------------------------------------------
@Rule
public TemporaryFolder tmpFolder = new TemporaryFolder();
protected static final SimpleString ADDRESS = new SimpleString("FailoverTestAddress");
@ -216,7 +225,34 @@ public abstract class FailoverTestBase extends ActiveMQTestBase {
}
}
protected void createPluggableReplicatedConfigs() throws Exception {
final TransportConfiguration liveConnector = getConnectorTransportConfiguration(true);
final TransportConfiguration backupConnector = getConnectorTransportConfiguration(false);
final TransportConfiguration backupAcceptor = getAcceptorTransportConfiguration(false);
backupConfig = createDefaultInVMConfig();
liveConfig = createDefaultInVMConfig();
DistributedPrimitiveManagerConfiguration managerConfiguration =
new DistributedPrimitiveManagerConfiguration(FileBasedPrimitiveManager.class.getName(),
Collections.singletonMap("locks-folder", tmpFolder.newFolder("manager").toString()));
ReplicatedBackupUtils.configurePluggableQuorumReplicationPair(backupConfig, backupConnector, backupAcceptor, liveConfig, liveConnector, null, managerConfiguration, managerConfiguration);
backupConfig.setBindingsDirectory(getBindingsDir(0, true)).setJournalDirectory(getJournalDir(0, true)).setPagingDirectory(getPageDir(0, true)).setLargeMessagesDirectory(getLargeMessagesDir(0, true)).setSecurityEnabled(false);
setupHAPolicyConfiguration();
nodeManager = createReplicatedBackupNodeManager(backupConfig);
backupServer = createTestableServer(backupConfig);
liveConfig.clearAcceptorConfigurations().addAcceptorConfiguration(getAcceptorTransportConfiguration(true));
liveServer = createTestableServer(liveConfig);
}
protected void setupHAPolicyConfiguration() {
Assert.assertTrue(backupConfig.getHAPolicyConfiguration() instanceof ReplicaPolicyConfiguration);
((ReplicaPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setMaxSavedReplicatedJournalsSize(-1).setAllowFailBack(true);
((ReplicaPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setRestartBackup(false);
}
@ -233,8 +269,13 @@ public abstract class FailoverTestBase extends ActiveMQTestBase {
configuration.getConnectorConfigurations().put(backupConnector.getName(), backupConnector);
return;
}
ReplicatedPolicy haPolicy = (ReplicatedPolicy) server.getServer().getHAPolicy();
haPolicy.setCheckForLiveServer(true);
HAPolicy policy = server.getServer().getHAPolicy();
if (policy instanceof ReplicatedPolicy) {
((ReplicatedPolicy) policy).setCheckForLiveServer(true);
} else if (policy instanceof ReplicationPrimaryPolicy) {
Assert.assertTrue("Adapting won't work for the current configuration", ((ReplicationPrimaryPolicy) policy).isCheckForLiveServer());
}
}
@Override

View File

@ -19,8 +19,8 @@ package org.apache.activemq.artemis.tests.integration.cluster.failover;
public class GroupingFailoverReplicationTest extends GroupingFailoverTestBase {
@Override
protected boolean isSharedStore() {
return false;
protected HAType haType() {
return HAType.SharedNothingReplication;
}
}

View File

@ -19,7 +19,7 @@ package org.apache.activemq.artemis.tests.integration.cluster.failover;
public class GroupingFailoverSharedServerTest extends GroupingFailoverTestBase {
@Override
protected boolean isSharedStore() {
return true;
protected HAType haType() {
return HAType.SharedStore;
}
}

View File

@ -26,22 +26,26 @@ import org.apache.activemq.artemis.api.core.client.ServerLocator;
import org.apache.activemq.artemis.core.client.impl.TopologyMemberImpl;
import org.apache.activemq.artemis.core.config.ha.ReplicaPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicatedPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration;
import org.apache.activemq.artemis.core.server.cluster.impl.MessageLoadBalancingType;
import org.apache.activemq.artemis.core.server.group.impl.GroupingHandlerConfiguration;
import org.apache.activemq.artemis.core.server.impl.ReplicationBackupActivation;
import org.apache.activemq.artemis.core.server.impl.SharedNothingBackupActivation;
import org.apache.activemq.artemis.tests.integration.cluster.distribution.ClusterTestBase;
import org.apache.activemq.artemis.tests.util.ActiveMQTestBase;
import org.apache.activemq.artemis.utils.Wait;
import org.junit.Test;
public abstract class GroupingFailoverTestBase extends ClusterTestBase {
@Test
public void testGroupingLocalHandlerFails() throws Exception {
setupBackupServer(2, 0, isFileStorage(), isSharedStore(), isNetty());
setupBackupServer(2, 0, isFileStorage(), haType(), isNetty());
setupLiveServer(0, isFileStorage(), isSharedStore(), isNetty(), false);
setupLiveServer(0, isFileStorage(), haType(), isNetty(), false);
setupLiveServer(1, isFileStorage(), isSharedStore(), isNetty(), false);
setupLiveServer(1, isFileStorage(), haType(), isNetty(), false);
setupClusterConnection("cluster0", "queues", MessageLoadBalancingType.ON_DEMAND, 1, isNetty(), 0, 1);
@ -54,10 +58,18 @@ public abstract class GroupingFailoverTestBase extends ClusterTestBase {
setUpGroupHandler(GroupingHandlerConfiguration.TYPE.REMOTE, 1);
setUpGroupHandler(GroupingHandlerConfiguration.TYPE.LOCAL, 2);
if (!isSharedStore()) {
((ReplicatedPolicyConfiguration) servers[0].getConfiguration().getHAPolicyConfiguration()).setGroupName("group1");
((ReplicatedPolicyConfiguration) servers[1].getConfiguration().getHAPolicyConfiguration()).setGroupName("group2");
((ReplicaPolicyConfiguration) servers[2].getConfiguration().getHAPolicyConfiguration()).setGroupName("group1");
switch (haType()) {
case SharedNothingReplication:
((ReplicatedPolicyConfiguration) servers[0].getConfiguration().getHAPolicyConfiguration()).setGroupName("group1");
((ReplicatedPolicyConfiguration) servers[1].getConfiguration().getHAPolicyConfiguration()).setGroupName("group2");
((ReplicaPolicyConfiguration) servers[2].getConfiguration().getHAPolicyConfiguration()).setGroupName("group1");
break;
case PluggableQuorumReplication:
((ReplicationPrimaryPolicyConfiguration) servers[0].getConfiguration().getHAPolicyConfiguration()).setGroupName("group1");
((ReplicationPrimaryPolicyConfiguration) servers[1].getConfiguration().getHAPolicyConfiguration()).setGroupName("group2");
((ReplicationBackupPolicyConfiguration) servers[2].getConfiguration().getHAPolicyConfiguration()).setGroupName("group1");
break;
}
startServers(0, 1, 2);
@ -129,11 +141,11 @@ public abstract class GroupingFailoverTestBase extends ClusterTestBase {
@Test
public void testGroupingLocalHandlerFailsMultipleGroups() throws Exception {
setupBackupServer(2, 0, isFileStorage(), isSharedStore(), isNetty());
setupBackupServer(2, 0, isFileStorage(), haType(), isNetty());
setupLiveServer(0, isFileStorage(), isSharedStore(), isNetty(), false);
setupLiveServer(0, isFileStorage(), haType(), isNetty(), false);
setupLiveServer(1, isFileStorage(), isSharedStore(), isNetty(), false);
setupLiveServer(1, isFileStorage(), haType(), isNetty(), false);
setupClusterConnection("cluster0", "queues", MessageLoadBalancingType.ON_DEMAND, 1, isNetty(), 0, 1);
@ -147,10 +159,18 @@ public abstract class GroupingFailoverTestBase extends ClusterTestBase {
setUpGroupHandler(GroupingHandlerConfiguration.TYPE.LOCAL, 2);
if (!isSharedStore()) {
((ReplicatedPolicyConfiguration) servers[0].getConfiguration().getHAPolicyConfiguration()).setGroupName("group1");
((ReplicatedPolicyConfiguration) servers[1].getConfiguration().getHAPolicyConfiguration()).setGroupName("group2");
((ReplicaPolicyConfiguration) servers[2].getConfiguration().getHAPolicyConfiguration()).setGroupName("group1");
switch (haType()) {
case SharedNothingReplication:
((ReplicatedPolicyConfiguration) servers[0].getConfiguration().getHAPolicyConfiguration()).setGroupName("group1");
((ReplicatedPolicyConfiguration) servers[1].getConfiguration().getHAPolicyConfiguration()).setGroupName("group2");
((ReplicaPolicyConfiguration) servers[2].getConfiguration().getHAPolicyConfiguration()).setGroupName("group1");
break;
case PluggableQuorumReplication:
((ReplicationPrimaryPolicyConfiguration) servers[0].getConfiguration().getHAPolicyConfiguration()).setGroupName("group1");
((ReplicationPrimaryPolicyConfiguration) servers[1].getConfiguration().getHAPolicyConfiguration()).setGroupName("group2");
((ReplicationBackupPolicyConfiguration) servers[2].getConfiguration().getHAPolicyConfiguration()).setGroupName("group1");
break;
}
startServers(0, 1, 2);
@ -187,9 +207,17 @@ public abstract class GroupingFailoverTestBase extends ClusterTestBase {
verifyReceiveAllWithGroupIDRoundRobin(0, 30, 0, 1);
if (!isSharedStore()) {
SharedNothingBackupActivation backupActivation = (SharedNothingBackupActivation) servers[2].getActivation();
assertTrue(backupActivation.waitForBackupSync(10, TimeUnit.SECONDS));
switch (haType()) {
case SharedNothingReplication: {
SharedNothingBackupActivation backupActivation = (SharedNothingBackupActivation) servers[2].getActivation();
assertTrue(backupActivation.waitForBackupSync(10, TimeUnit.SECONDS));
}
break;
case PluggableQuorumReplication: {
ReplicationBackupActivation backupActivation = (ReplicationBackupActivation) servers[2].getActivation();
Wait.assertTrue(backupActivation::isReplicaSync, TimeUnit.SECONDS.toMillis(10));
}
break;
}
closeSessionFactory(0);

View File

@ -49,14 +49,14 @@ public class LiveVoteOnBackupFailureClusterTest extends ClusterWithBackupFailove
@Override
protected void setupServers() throws Exception {
// The backups
setupBackupServer(3, 0, isFileStorage(), isSharedStorage(), isNetty());
setupBackupServer(4, 1, isFileStorage(), isSharedStorage(), isNetty());
setupBackupServer(5, 2, isFileStorage(), isSharedStorage(), isNetty());
setupBackupServer(3, 0, isFileStorage(), haType(), isNetty());
setupBackupServer(4, 1, isFileStorage(), haType(), isNetty());
setupBackupServer(5, 2, isFileStorage(), haType(), isNetty());
// The lives
setupLiveServer(0, isFileStorage(), isSharedStorage(), isNetty(), false);
setupLiveServer(1, isFileStorage(), isSharedStorage(), isNetty(), false);
setupLiveServer(2, isFileStorage(), isSharedStorage(), isNetty(), false);
setupLiveServer(0, isFileStorage(), haType(), isNetty(), false);
setupLiveServer(1, isFileStorage(), haType(), isNetty(), false);
setupLiveServer(2, isFileStorage(), haType(), isNetty(), false);
//we need to know who is connected to who
((ReplicatedPolicyConfiguration) servers[0].getConfiguration().getHAPolicyConfiguration()).setGroupName("group0");
@ -71,9 +71,9 @@ public class LiveVoteOnBackupFailureClusterTest extends ClusterWithBackupFailove
((ReplicatedPolicyConfiguration) servers[1].getConfiguration().getHAPolicyConfiguration()).setVoteOnReplicationFailure(true);
((ReplicatedPolicyConfiguration) servers[2].getConfiguration().getHAPolicyConfiguration()).setVoteOnReplicationFailure(true);
}
protected boolean isSharedStorage() {
return false;
@Override
protected HAType haType() {
return HAType.SharedNothingReplication;
}
@Test

View File

@ -16,7 +16,9 @@
*/
package org.apache.activemq.artemis.tests.integration.cluster.failover;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.activemq.artemis.api.core.SimpleString;
@ -27,22 +29,47 @@ import org.apache.activemq.artemis.api.core.client.ClientSessionFactory;
import org.apache.activemq.artemis.core.client.impl.ServerLocatorInternal;
import org.apache.activemq.artemis.core.config.Configuration;
import org.apache.activemq.artemis.core.config.HAPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.DistributedPrimitiveManagerConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicaPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicatedPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.SharedStoreMasterPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.SharedStoreSlavePolicyConfiguration;
import org.apache.activemq.artemis.core.server.ActiveMQServer;
import org.apache.activemq.artemis.core.server.ActiveMQServerLogger;
import org.apache.activemq.artemis.core.server.NodeManager;
import org.apache.activemq.artemis.core.server.Queue;
import org.apache.activemq.artemis.quorum.file.FileBasedPrimitiveManager;
import org.apache.activemq.artemis.tests.integration.cluster.distribution.ClusterTestBase;
import org.apache.activemq.artemis.tests.util.Wait;
import org.apache.activemq.artemis.tests.integration.cluster.util.SameProcessActiveMQServer;
import org.apache.activemq.artemis.tests.integration.cluster.util.TestableServer;
import org.apache.activemq.artemis.tests.util.ActiveMQTestBase;
import org.apache.activemq.artemis.tests.util.TransportConfigurationUtils;
import org.junit.Before;
import org.junit.Rule;
import org.junit.rules.TemporaryFolder;
public abstract class MultipleServerFailoverTestBase extends ActiveMQTestBase {
@Rule
public TemporaryFolder tmpFolder = new TemporaryFolder();
private DistributedPrimitiveManagerConfiguration pluggableQuorumConfiguration = null;
private DistributedPrimitiveManagerConfiguration getOrCreatePluggableQuorumConfiguration() {
if (pluggableQuorumConfiguration != null) {
return pluggableQuorumConfiguration;
}
try {
pluggableQuorumConfiguration = new DistributedPrimitiveManagerConfiguration(FileBasedPrimitiveManager.class.getName(), Collections.singletonMap("locks-folder", tmpFolder.newFolder("manager").toString()));
} catch (IOException ioException) {
return null;
}
return pluggableQuorumConfiguration;
}
// Constants -----------------------------------------------------
// TODO: find a better solution for this
@ -67,7 +94,15 @@ public abstract class MultipleServerFailoverTestBase extends ActiveMQTestBase {
public abstract boolean isNetty();
public abstract boolean isSharedStore();
public enum HAType {
SharedStore, SharedNothingReplication, PluggableQuorumReplication
}
public abstract HAType haType();
protected final boolean isSharedStore() {
return ClusterTestBase.HAType.SharedStore.equals(haType());
}
public abstract String getNodeGroupName();
@ -82,14 +117,22 @@ public abstract class MultipleServerFailoverTestBase extends ActiveMQTestBase {
for (int i = 0; i < getLiveServerCount(); i++) {
HAPolicyConfiguration haPolicyConfiguration = null;
switch (haType()) {
if (isSharedStore()) {
haPolicyConfiguration = new SharedStoreMasterPolicyConfiguration();
} else {
haPolicyConfiguration = new ReplicatedPolicyConfiguration();
if (getNodeGroupName() != null) {
((ReplicatedPolicyConfiguration) haPolicyConfiguration).setGroupName(getNodeGroupName() + "-" + i);
}
case SharedStore:
haPolicyConfiguration = new SharedStoreMasterPolicyConfiguration();
break;
case SharedNothingReplication:
haPolicyConfiguration = new ReplicatedPolicyConfiguration();
if (getNodeGroupName() != null) {
((ReplicatedPolicyConfiguration) haPolicyConfiguration).setGroupName(getNodeGroupName() + "-" + i);
}
break;
case PluggableQuorumReplication:
haPolicyConfiguration = ReplicationPrimaryPolicyConfiguration.withDefault()
.setDistributedManagerConfiguration(getOrCreatePluggableQuorumConfiguration())
.setGroupName(getNodeGroupName() != null ? (getNodeGroupName() + "-" + i) : null);
break;
}
Configuration configuration = createDefaultConfig(isNetty()).clearAcceptorConfigurations().addAcceptorConfiguration(getAcceptorTransportConfiguration(true, i)).setHAPolicyConfiguration(haPolicyConfiguration);
@ -126,13 +169,24 @@ public abstract class MultipleServerFailoverTestBase extends ActiveMQTestBase {
for (int i = 0; i < getBackupServerCount(); i++) {
HAPolicyConfiguration haPolicyConfiguration = null;
if (isSharedStore()) {
haPolicyConfiguration = new SharedStoreSlavePolicyConfiguration();
} else {
haPolicyConfiguration = new ReplicaPolicyConfiguration();
if (getNodeGroupName() != null) {
((ReplicaPolicyConfiguration) haPolicyConfiguration).setGroupName(getNodeGroupName() + "-" + i);
}
switch (haType()) {
case SharedStore:
haPolicyConfiguration = new SharedStoreSlavePolicyConfiguration();
break;
case SharedNothingReplication:
haPolicyConfiguration = new ReplicaPolicyConfiguration();
if (getNodeGroupName() != null) {
((ReplicaPolicyConfiguration) haPolicyConfiguration).setGroupName(getNodeGroupName() + "-" + i);
}
break;
case PluggableQuorumReplication:
haPolicyConfiguration = ReplicationBackupPolicyConfiguration.withDefault()
.setVoteRetries(1)
.setVoteRetryWait(1000)
.setDistributedManagerConfiguration(getOrCreatePluggableQuorumConfiguration())
.setGroupName(getNodeGroupName() != null ? (getNodeGroupName() + "-" + i) : null);
break;
}
Configuration configuration = createDefaultConfig(isNetty()).clearAcceptorConfigurations().addAcceptorConfiguration(getAcceptorTransportConfiguration(false, i)).setHAPolicyConfiguration(haPolicyConfiguration);

View File

@ -103,7 +103,7 @@ public class NettyReplicationStopTest extends FailoverTestBase {
final int numMessages = 10;
ReplicationEndpoint endpoint = backupServer.getServer().getReplicationEndpoint();
ReplicationEndpoint endpoint = getReplicationEndpoint(backupServer.getServer());
endpoint.pause();

View File

@ -124,14 +124,14 @@ public class NetworkIsolationTest extends FailoverTestBase {
liveServer.start();
for (int i = 0; i < 1000 && backupServer.getServer().getReplicationEndpoint() != null && !backupServer.getServer().getReplicationEndpoint().isStarted(); i++) {
for (int i = 0; i < 1000 && getReplicationEndpoint(backupServer.getServer()) != null && !getReplicationEndpoint(backupServer.getServer()).isStarted(); i++) {
Thread.sleep(10);
}
backupServer.getServer().getNetworkHealthCheck().clearAddresses();
// This will make sure the backup got synchronized after the network was activated again
Wait.assertTrue(() -> backupServer.getServer().getReplicationEndpoint().isStarted());
Assert.assertTrue(getReplicationEndpoint(backupServer.getServer()).isStarted());
} finally {
AssertionLoggerHandler.stopCapture();
}

View File

@ -17,12 +17,10 @@
package org.apache.activemq.artemis.tests.integration.cluster.failover;
import java.io.IOException;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.activemq.artemis.api.core.ActiveMQException;
import org.apache.activemq.artemis.api.core.Interceptor;
import org.apache.activemq.artemis.api.core.QueueConfiguration;
import org.apache.activemq.artemis.api.core.SimpleString;
import org.apache.activemq.artemis.api.core.TransportConfiguration;
@ -34,16 +32,18 @@ import org.apache.activemq.artemis.core.client.impl.ClientSessionFactoryInternal
import org.apache.activemq.artemis.core.config.Configuration;
import org.apache.activemq.artemis.core.config.ha.ReplicaPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicatedPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.SharedStoreSlavePolicyConfiguration;
import org.apache.activemq.artemis.core.protocol.core.Packet;
import org.apache.activemq.artemis.core.protocol.core.impl.PacketImpl;
import org.apache.activemq.artemis.core.replication.ReplicationEndpoint;
import org.apache.activemq.artemis.core.server.NodeManager;
import org.apache.activemq.artemis.core.server.impl.Activation;
import org.apache.activemq.artemis.core.server.impl.ActiveMQServerImpl;
import org.apache.activemq.artemis.core.server.impl.InVMNodeManager;
import org.apache.activemq.artemis.core.server.impl.ReplicationBackupActivation;
import org.apache.activemq.artemis.core.server.impl.SharedNothingBackupActivation;
import org.apache.activemq.artemis.tests.util.Wait;
import org.apache.activemq.artemis.logs.AssertionLoggerHandler;
import org.apache.activemq.artemis.spi.core.protocol.RemotingConnection;
import org.apache.activemq.artemis.tests.integration.cluster.util.SameProcessActiveMQServer;
import org.apache.activemq.artemis.tests.integration.cluster.util.TestableServer;
import org.apache.activemq.artemis.tests.util.ActiveMQTestBase;
@ -77,7 +77,9 @@ public class ReplicaTimeoutTest extends ActiveMQTestBase {
}
protected TestableServer createTestableServer(Configuration config, NodeManager nodeManager) throws Exception {
boolean isBackup = config.getHAPolicyConfiguration() instanceof ReplicaPolicyConfiguration || config.getHAPolicyConfiguration() instanceof SharedStoreSlavePolicyConfiguration;
boolean isBackup = config.getHAPolicyConfiguration() instanceof ReplicationBackupPolicyConfiguration ||
config.getHAPolicyConfiguration() instanceof ReplicaPolicyConfiguration ||
config.getHAPolicyConfiguration() instanceof SharedStoreSlavePolicyConfiguration;
return new SameProcessActiveMQServer(createInVMFailoverServer(true, config, nodeManager, isBackup ? 2 : 1));
}
@ -119,6 +121,19 @@ public class ReplicaTimeoutTest extends ActiveMQTestBase {
liveServer.crash(true, true, sessions);
}
protected void configureReplicationPair(Configuration backupConfig,
Configuration liveConfig,
TransportConfiguration backupConnector,
TransportConfiguration backupAcceptor,
TransportConfiguration liveConnector) throws IOException {
ReplicatedBackupUtils.configureReplicationPair(backupConfig, backupConnector, backupAcceptor, liveConfig, liveConnector, null);
((ReplicatedPolicyConfiguration) liveConfig.getHAPolicyConfiguration()).setInitialReplicationSyncTimeout(1000);
((ReplicaPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setInitialReplicationSyncTimeout(1000);
((ReplicatedPolicyConfiguration) liveConfig.getHAPolicyConfiguration()).setCheckForLiveServer(true);
((ReplicaPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setMaxSavedReplicatedJournalsSize(2).setAllowFailBack(true);
((ReplicaPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setRestartBackup(false);
}
@Test//(timeout = 120000)
public void testFailbackTimeout() throws Exception {
AssertionLoggerHandler.startCapture();
@ -134,19 +149,13 @@ public class ReplicaTimeoutTest extends ActiveMQTestBase {
Configuration backupConfig = createDefaultInVMConfig();
Configuration liveConfig = createDefaultInVMConfig();
ReplicatedBackupUtils.configureReplicationPair(backupConfig, backupConnector, backupAcceptor, liveConfig, liveConnector, null);
((ReplicatedPolicyConfiguration) liveConfig.getHAPolicyConfiguration()).setInitialReplicationSyncTimeout(1000);
((ReplicaPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setInitialReplicationSyncTimeout(1000);
configureReplicationPair(backupConfig, liveConfig, backupConnector, backupAcceptor, liveConnector);
backupConfig.setBindingsDirectory(getBindingsDir(0, true)).setJournalDirectory(getJournalDir(0, true)).
setPagingDirectory(getPageDir(0, true)).setLargeMessagesDirectory(getLargeMessagesDir(0, true)).setSecurityEnabled(false);
liveConfig.setBindingsDirectory(getBindingsDir(0, false)).setJournalDirectory(getJournalDir(0, false)).
setPagingDirectory(getPageDir(0, false)).setLargeMessagesDirectory(getLargeMessagesDir(0, false)).setSecurityEnabled(false);
((ReplicatedPolicyConfiguration) liveConfig.getHAPolicyConfiguration()).setCheckForLiveServer(true);
((ReplicaPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setMaxSavedReplicatedJournalsSize(2).setAllowFailBack(true);
((ReplicaPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setRestartBackup(false);
NodeManager nodeManager = createReplicatedBackupNodeManager(backupConfig);
backupServer = createTestableServer(backupConfig, nodeManager);
@ -155,8 +164,6 @@ public class ReplicaTimeoutTest extends ActiveMQTestBase {
liveServer = createTestableServer(liveConfig, nodeManager);
AtomicBoolean ignoreIntercept = new AtomicBoolean(false);
final TestableServer theBackup = backupServer;
liveServer.start();
@ -174,23 +181,30 @@ public class ReplicaTimeoutTest extends ActiveMQTestBase {
Wait.assertTrue(backupServer.getServer()::isActive);
ignoreIntercept.set(true);
((ActiveMQServerImpl) backupServer.getServer()).setAfterActivationCreated(new Runnable() {
@Override
public void run() {
//theBackup.getServer().getActivation()
SharedNothingBackupActivation activation = (SharedNothingBackupActivation) theBackup.getServer().getActivation();
activation.getReplicationEndpoint().addOutgoingInterceptorForReplication(new Interceptor() {
@Override
public boolean intercept(Packet packet, RemotingConnection connection) throws ActiveMQException {
if (ignoreIntercept.get() && packet.getType() == PacketImpl.REPLICATION_RESPONSE_V2) {
final Activation backupActivation = theBackup.getServer().getActivation();
if (backupActivation instanceof SharedNothingBackupActivation) {
SharedNothingBackupActivation activation = (SharedNothingBackupActivation) backupActivation;
ReplicationEndpoint repEnd = activation.getReplicationEndpoint();
repEnd.addOutgoingInterceptorForReplication((packet, connection) -> {
if (packet.getType() == PacketImpl.REPLICATION_RESPONSE_V2) {
return false;
}
return true;
}
});
});
} else if (backupActivation instanceof ReplicationBackupActivation) {
ReplicationBackupActivation activation = (ReplicationBackupActivation) backupActivation;
activation.spyReplicationEndpointCreation(replicationEndpoint -> {
replicationEndpoint.addOutgoingInterceptorForReplication((packet, connection) -> {
if (packet.getType() == PacketImpl.REPLICATION_RESPONSE_V2) {
return false;
}
return true;
});
});
}
}
});
@ -198,7 +212,9 @@ public class ReplicaTimeoutTest extends ActiveMQTestBase {
Assert.assertTrue(Wait.waitFor(() -> AssertionLoggerHandler.findText("AMQ229114")));
Wait.assertFalse(liveServer.getServer()::isStarted);
if (expectLiveSuicide()) {
Wait.assertFalse(liveServer.getServer()::isStarted);
}
} finally {
if (sf != null) {
@ -218,4 +234,8 @@ public class ReplicaTimeoutTest extends ActiveMQTestBase {
}
}
protected boolean expectLiveSuicide() {
return true;
}
}

View File

@ -178,9 +178,9 @@ public class ReplicatedDistributionTest extends ClusterTestBase {
public void setUp() throws Exception {
super.setUp();
setupLiveServer(1, true, isSharedStore(), true, false);
setupLiveServer(3, true, isSharedStore(), true, false);
setupBackupServer(2, 3, true, isSharedStore(), true);
setupLiveServer(1, true, haType(), true, false);
setupLiveServer(3, true, haType(), true, false);
setupBackupServer(2, 3, true, haType(), true);
final String address = ReplicatedDistributionTest.ADDRESS.toString();
// notice the abuse of the method call, '3' is not a backup for '1'
@ -210,7 +210,7 @@ public class ReplicatedDistributionTest extends ClusterTestBase {
}
@Override
protected boolean isSharedStore() {
return false;
protected HAType haType() {
return HAType.SharedNothingReplication;
}
}

View File

@ -29,6 +29,7 @@ import org.apache.activemq.artemis.api.core.client.ClientSessionFactory;
import org.apache.activemq.artemis.api.core.client.FailoverEventType;
import org.apache.activemq.artemis.api.core.client.ServerLocator;
import org.apache.activemq.artemis.core.config.ha.ReplicaPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
import org.apache.activemq.artemis.core.server.ActiveMQServer;
import org.apache.activemq.artemis.tests.util.Wait;
import org.apache.activemq.artemis.tests.integration.cluster.util.TestableServer;
@ -51,8 +52,16 @@ public class ReplicatedMultipleServerFailoverExtraBackupsTest extends Replicated
@Override
@Test
public void testStartLiveFirst() throws Exception {
((ReplicaPolicyConfiguration) backupServers.get(2).getServer().getConfiguration().getHAPolicyConfiguration()).setGroupName(getNodeGroupName() + "-0");
((ReplicaPolicyConfiguration) backupServers.get(3).getServer().getConfiguration().getHAPolicyConfiguration()).setGroupName(getNodeGroupName() + "-1");
switch (haType()) {
case SharedNothingReplication:
((ReplicaPolicyConfiguration) backupServers.get(2).getServer().getConfiguration().getHAPolicyConfiguration()).setGroupName(getNodeGroupName() + "-0");
((ReplicaPolicyConfiguration) backupServers.get(3).getServer().getConfiguration().getHAPolicyConfiguration()).setGroupName(getNodeGroupName() + "-1");
break;
case PluggableQuorumReplication:
((ReplicationBackupPolicyConfiguration) backupServers.get(2).getServer().getConfiguration().getHAPolicyConfiguration()).setGroupName(getNodeGroupName() + "-0");
((ReplicationBackupPolicyConfiguration) backupServers.get(3).getServer().getConfiguration().getHAPolicyConfiguration()).setGroupName(getNodeGroupName() + "-1");
break;
}
startServers(liveServers);
backupServers.get(0).start();
@ -85,8 +94,17 @@ public class ReplicatedMultipleServerFailoverExtraBackupsTest extends Replicated
@Override
@Test
public void testStartBackupFirst() throws Exception {
((ReplicaPolicyConfiguration) backupServers.get(2).getServer().getConfiguration().getHAPolicyConfiguration()).setGroupName(getNodeGroupName() + "-0");
((ReplicaPolicyConfiguration) backupServers.get(3).getServer().getConfiguration().getHAPolicyConfiguration()).setGroupName(getNodeGroupName() + "-1");
switch (haType()) {
case SharedNothingReplication:
((ReplicaPolicyConfiguration) backupServers.get(2).getServer().getConfiguration().getHAPolicyConfiguration()).setGroupName(getNodeGroupName() + "-0");
((ReplicaPolicyConfiguration) backupServers.get(3).getServer().getConfiguration().getHAPolicyConfiguration()).setGroupName(getNodeGroupName() + "-1");
break;
case PluggableQuorumReplication:
((ReplicationBackupPolicyConfiguration) backupServers.get(2).getServer().getConfiguration().getHAPolicyConfiguration()).setGroupName(getNodeGroupName() + "-0");
((ReplicationBackupPolicyConfiguration) backupServers.get(3).getServer().getConfiguration().getHAPolicyConfiguration()).setGroupName(getNodeGroupName() + "-1");
break;
}
startServers(backupServers);
startServers(liveServers);

View File

@ -16,6 +16,9 @@
*/
package org.apache.activemq.artemis.tests.integration.cluster.failover;
import java.util.Arrays;
import java.util.Collection;
import org.apache.activemq.artemis.api.core.QueueConfiguration;
import org.apache.activemq.artemis.api.core.client.ClientConsumer;
import org.apache.activemq.artemis.api.core.client.ClientMessage;
@ -25,9 +28,20 @@ import org.apache.activemq.artemis.api.core.client.ClientSessionFactory;
import org.apache.activemq.artemis.api.core.client.ServerLocator;
import org.apache.activemq.artemis.tests.integration.cluster.util.TestableServer;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
@RunWith(Parameterized.class)
public class ReplicatedMultipleServerFailoverTest extends MultipleServerFailoverTestBase {
@Parameterized.Parameter
public HAType haType;
@Parameterized.Parameters(name = "ha={0}")
public static Collection<Object[]> getParams() {
return Arrays.asList(new Object[][]{{HAType.SharedNothingReplication}, {HAType.PluggableQuorumReplication}});
}
@Test
public void testStartLiveFirst() throws Exception {
for (TestableServer liveServer : liveServers) {
@ -140,8 +154,8 @@ public class ReplicatedMultipleServerFailoverTest extends MultipleServerFailover
}
@Override
public boolean isSharedStore() {
return false;
public HAType haType() {
return haType;
}
@Override

View File

@ -19,7 +19,7 @@ package org.apache.activemq.artemis.tests.integration.cluster.failover;
public class SharedStoreDistributionTest extends ReplicatedDistributionTest {
@Override
protected boolean isSharedStore() {
return true;
protected HAType haType() {
return HAType.SharedStore;
}
}

View File

@ -41,8 +41,8 @@ public class SharedStoreDontWaitForActivationTest extends ClusterTestBase {
// 1. configure 0 as backup of one to share the same node manager and file
// storage locations
setupBackupServer(0, 1, isFileStorage(), true, isNetty());
setupLiveServer(1, isFileStorage(), true, isNetty(), false);
setupBackupServer(0, 1, isFileStorage(), HAType.SharedStore, isNetty());
setupLiveServer(1, isFileStorage(), HAType.SharedStore, isNetty(), false);
// now reconfigure the HA policy for both servers to master with automatic
// failover and wait-for-activation disabled.

View File

@ -40,8 +40,8 @@ public class SharedStoreMetricsLeakTest extends ClusterTestBase {
}
private void setupServers() throws Exception {
setupLiveServer(0, isFileStorage(), true, isNetty(), false);
setupBackupServer(1, 0, isFileStorage(), true, isNetty());
setupLiveServer(0, isFileStorage(), HAType.SharedStore, isNetty(), false);
setupBackupServer(1, 0, isFileStorage(), HAType.SharedStore, isNetty());
getServer(0).getConfiguration().setHAPolicyConfiguration(new SharedStoreMasterPolicyConfiguration().setFailoverOnServerShutdown(true));
getServer(0).getConfiguration().setMetricsConfiguration(new MetricsConfiguration().setJvmThread(false).setJvmGc(false).setJvmMemory(false).setPlugin(new SimpleMetricsPlugin().init(null)));

View File

@ -41,9 +41,9 @@ public class SharedStoreScaleDownBackupTest extends ClusterTestBase {
public void setUp() throws Exception {
super.setUp();
setupLiveServer(0, isFileStorage(), true, isNetty(), false);
setupLiveServer(1, isFileStorage(), true, isNetty(), false);
setupBackupServer(2, 0, isFileStorage(), true, isNetty());
setupLiveServer(0, isFileStorage(), HAType.SharedStore, isNetty(), false);
setupLiveServer(1, isFileStorage(), HAType.SharedStore, isNetty(), false);
setupBackupServer(2, 0, isFileStorage(), HAType.SharedStore, isNetty());
setupClusterConnection("cluster0", "testAddress", MessageLoadBalancingType.ON_DEMAND, 1, isNetty(), 0, 1);
setupClusterConnection("cluster1", "testAddress", MessageLoadBalancingType.ON_DEMAND, 1, isNetty(), 1, 0);

View File

@ -42,13 +42,13 @@ public class StaticClusterWithBackupFailoverTest extends ClusterWithBackupFailov
@Override
protected void setupServers() throws Exception {
// The backups
setupBackupServer(3, 0, isFileStorage(), isSharedStorage(), isNetty());
setupBackupServer(4, 1, isFileStorage(), isSharedStorage(), isNetty());
setupBackupServer(5, 2, isFileStorage(), isSharedStorage(), isNetty());
setupBackupServer(3, 0, isFileStorage(), haType(), isNetty());
setupBackupServer(4, 1, isFileStorage(), haType(), isNetty());
setupBackupServer(5, 2, isFileStorage(), haType(), isNetty());
// The lives
setupLiveServer(0, isFileStorage(), isSharedStorage(), isNetty(), false);
setupLiveServer(1, isFileStorage(), isSharedStorage(), isNetty(), false);
setupLiveServer(2, isFileStorage(), isSharedStorage(), isNetty(), false);
setupLiveServer(0, isFileStorage(), haType(), isNetty(), false);
setupLiveServer(1, isFileStorage(), haType(), isNetty(), false);
setupLiveServer(2, isFileStorage(), haType(), isNetty(), false);
}
}

View File

@ -0,0 +1,121 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.tests.integration.cluster.failover.quorum;
import java.util.Arrays;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import org.apache.activemq.artemis.api.core.ActiveMQException;
import org.apache.activemq.artemis.api.core.Interceptor;
import org.apache.activemq.artemis.api.core.TransportConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration;
import org.apache.activemq.artemis.core.protocol.core.Packet;
import org.apache.activemq.artemis.core.protocol.core.impl.PacketImpl;
import org.apache.activemq.artemis.spi.core.protocol.RemotingConnection;
import org.apache.activemq.artemis.tests.integration.cluster.failover.FailoverTestBase;
import org.apache.activemq.artemis.tests.integration.cluster.failover.FakeServiceComponent;
import org.apache.activemq.artemis.tests.util.TransportConfigurationUtils;
import org.apache.activemq.artemis.tests.util.Wait;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import static java.util.Arrays.asList;
@RunWith(Parameterized.class)
public class PluggableQuorumBackupAuthenticationTest extends FailoverTestBase {
private static CountDownLatch registrationStarted;
@Parameterized.Parameter
public boolean useNetty;
@Parameterized.Parameters(name = "useNetty={1}")
public static Iterable<Object[]> getParams() {
return asList(new Object[][]{{false}, {true}});
}
@Override
@Before
public void setUp() throws Exception {
startBackupServer = false;
registrationStarted = new CountDownLatch(1);
super.setUp();
}
@Test
public void testWrongPasswordSetting() throws Exception {
FakeServiceComponent fakeServiceComponent = new FakeServiceComponent("fake web server");
Wait.assertTrue(liveServer.getServer()::isActive);
waitForServerToStart(liveServer.getServer());
backupServer.start();
backupServer.getServer().addExternalComponent(fakeServiceComponent, true);
assertTrue(registrationStarted .await(5, TimeUnit.SECONDS));
/*
* can't intercept the message at the backup, so we intercept the registration message at the
* live.
*/
Wait.waitFor(() -> !backupServer.isStarted());
assertFalse("backup should have stopped", backupServer.isStarted());
Wait.assertFalse(fakeServiceComponent::isStarted);
backupServer.stop();
liveServer.stop();
}
@Override
protected void createConfigs() throws Exception {
createPluggableReplicatedConfigs();
backupConfig.setClusterPassword("crocodile");
liveConfig.setIncomingInterceptorClassNames(Arrays.asList(NotifyingInterceptor.class.getName()));
backupConfig.setSecurityEnabled(true);
liveConfig.setSecurityEnabled(true);
}
@Override
protected void setupHAPolicyConfiguration() {
((ReplicationPrimaryPolicyConfiguration) liveConfig.getHAPolicyConfiguration()).setCheckForLiveServer(true);
((ReplicationBackupPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setMaxSavedReplicatedJournalsSize(2).setAllowFailBack(true);
}
@Override
protected TransportConfiguration getAcceptorTransportConfiguration(final boolean live) {
return useNetty ? getNettyAcceptorTransportConfiguration(live) :
TransportConfigurationUtils.getInVMAcceptor(live);
}
@Override
protected TransportConfiguration getConnectorTransportConfiguration(final boolean live) {
return useNetty ? getNettyConnectorTransportConfiguration(live) :
TransportConfigurationUtils.getInVMConnector(live);
}
public static final class NotifyingInterceptor implements Interceptor {
@Override
public boolean intercept(Packet packet, RemotingConnection connection) throws ActiveMQException {
if (packet.getType() == PacketImpl.BACKUP_REGISTRATION) {
registrationStarted.countDown();
} else if (packet.getType() == PacketImpl.CLUSTER_CONNECT) {
registrationStarted.countDown();
}
return true;
}
}
}

View File

@ -0,0 +1,39 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.tests.integration.cluster.failover.quorum;
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration;
import org.apache.activemq.artemis.tests.integration.cluster.failover.BackupSyncJournalTest;
public class PluggableQuorumBackupSyncJournalTest extends BackupSyncJournalTest {
@Override
protected void createConfigs() throws Exception {
createPluggableReplicatedConfigs();
}
@Override
protected void setupHAPolicyConfiguration() {
((ReplicationPrimaryPolicyConfiguration) liveConfig.getHAPolicyConfiguration())
.setCheckForLiveServer(true);
((ReplicationBackupPolicyConfiguration) backupConfig.getHAPolicyConfiguration())
.setMaxSavedReplicatedJournalsSize(2)
.setAllowFailBack(true);
}
}

View File

@ -0,0 +1,104 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.tests.integration.cluster.failover.quorum;
import java.util.Arrays;
import org.apache.activemq.artemis.api.core.TransportConfiguration;
import org.apache.activemq.artemis.core.config.Configuration;
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration;
import org.apache.activemq.artemis.core.server.ActiveMQServer;
import org.apache.activemq.artemis.tests.integration.cluster.failover.FailoverTestBase;
import org.apache.activemq.artemis.tests.integration.cluster.util.TestableServer;
import org.apache.activemq.artemis.tests.util.TransportConfigurationUtils;
import org.apache.activemq.artemis.tests.util.Wait;
import org.junit.Assert;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
@RunWith(Parameterized.class)
public class PluggableQuorumExtraBackupReplicatedFailoverTest extends FailoverTestBase {
private static final String GROUP_NAME = "foo";
@Parameterized.Parameter
public boolean useGroupName;
@Parameterized.Parameters(name = "useGroupName={0}")
public static Iterable<Object[]> getParams() {
return Arrays.asList(new Object[][]{{false}, {true}});
}
@Override
protected void createConfigs() throws Exception {
createPluggableReplicatedConfigs();
}
@Override
protected void setupHAPolicyConfiguration() {
if (useGroupName) {
((ReplicationPrimaryPolicyConfiguration) liveConfig.getHAPolicyConfiguration()).setGroupName(GROUP_NAME);
((ReplicationBackupPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setGroupName(GROUP_NAME);
}
}
@Override
protected TransportConfiguration getAcceptorTransportConfiguration(final boolean live) {
return TransportConfigurationUtils.getInVMAcceptor(live);
}
@Override
protected TransportConfiguration getConnectorTransportConfiguration(final boolean live) {
return TransportConfigurationUtils.getInVMConnector(live);
}
@Test
public void testExtraBackupReplicates() throws Exception {
Configuration secondBackupConfig = backupConfig.copy();
String secondBackupGroupName = ((ReplicationBackupPolicyConfiguration) secondBackupConfig.getHAPolicyConfiguration()).getGroupName();
Assert.assertEquals(((ReplicationBackupPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).getGroupName(),
secondBackupGroupName);
if (useGroupName) {
Assert.assertEquals(GROUP_NAME, secondBackupGroupName);
} else {
Assert.assertNull(secondBackupGroupName);
}
TestableServer secondBackupServer = createTestableServer(secondBackupConfig);
secondBackupConfig.setBindingsDirectory(getBindingsDir(1, true))
.setJournalDirectory(getJournalDir(1, true))
.setPagingDirectory(getPageDir(1, true))
.setLargeMessagesDirectory(getLargeMessagesDir(1, true))
.setSecurityEnabled(false);
waitForRemoteBackupSynchronization(backupServer.getServer());
secondBackupServer.start();
Thread.sleep(5000);
backupServer.stop();
waitForSync(secondBackupServer.getServer());
waitForRemoteBackupSynchronization(secondBackupServer.getServer());
}
private void waitForSync(ActiveMQServer server) throws Exception {
Wait.waitFor(server::isReplicaSync);
}
}

View File

@ -0,0 +1,29 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.tests.integration.cluster.failover.quorum;
import org.apache.activemq.artemis.tests.integration.cluster.distribution.ClusterTestBase;
import org.apache.activemq.artemis.tests.integration.cluster.failover.GroupingFailoverTestBase;
public class PluggableQuorumGroupingFailoverReplicationTest extends GroupingFailoverTestBase {
@Override
protected ClusterTestBase.HAType haType() {
return HAType.PluggableQuorumReplication;
}
}

View File

@ -0,0 +1,213 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.tests.integration.cluster.failover.quorum;
import java.io.IOException;
import java.io.OutputStream;
import java.net.InetSocketAddress;
import java.util.ArrayList;
import java.util.concurrent.TimeUnit;
import com.sun.net.httpserver.HttpExchange;
import com.sun.net.httpserver.HttpHandler;
import com.sun.net.httpserver.HttpServer;
import org.apache.activemq.artemis.api.core.QueueConfiguration;
import org.apache.activemq.artemis.api.core.TransportConfiguration;
import org.apache.activemq.artemis.api.core.client.ClientSession;
import org.apache.activemq.artemis.component.WebServerComponent;
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration;
import org.apache.activemq.artemis.core.server.ActiveMQServer;
import org.apache.activemq.artemis.core.server.ServiceComponent;
import org.apache.activemq.artemis.dto.AppDTO;
import org.apache.activemq.artemis.dto.WebServerDTO;
import org.apache.activemq.artemis.tests.integration.cluster.failover.FailoverTest;
import org.apache.activemq.artemis.tests.util.Wait;
import org.junit.Assert;
import org.junit.Test;
public class PluggableQuorumNettyNoGroupNameReplicatedFailoverTest extends FailoverTest {
protected void beforeWaitForRemoteBackupSynchronization() {
}
private void waitForSync(ActiveMQServer server) throws Exception {
Wait.waitFor(server::isReplicaSync);
}
/**
* Default maxSavedReplicatedJournalsSize is 2, this means the backup will fall back to replicated only twice, after this
* it is stopped permanently.
*/
@Test(timeout = 120000)
public void testReplicatedFailback() throws Exception {
try {
beforeWaitForRemoteBackupSynchronization();
waitForSync(backupServer.getServer());
createSessionFactory();
ClientSession session = createSession(sf, true, true);
session.createQueue(new QueueConfiguration(ADDRESS));
crash(session);
liveServer.start();
waitForSync(liveServer.getServer());
waitForSync(backupServer.getServer());
waitForServerToStart(liveServer.getServer());
session = createSession(sf, true, true);
crash(session);
liveServer.start();
waitForSync(liveServer.getServer());
waitForSync(backupServer.getServer());
waitForServerToStart(liveServer.getServer());
session = createSession(sf, true, true);
crash(session);
liveServer.start();
waitForSync(liveServer.getServer());
liveServer.getServer().waitForActivation(5, TimeUnit.SECONDS);
waitForSync(liveServer.getServer());
waitForServerToStart(backupServer.getServer());
assertTrue(backupServer.getServer().isStarted());
} finally {
if (sf != null) {
sf.close();
}
try {
liveServer.getServer().stop();
} catch (Throwable ignored) {
}
try {
backupServer.getServer().stop();
} catch (Throwable ignored) {
}
}
}
@Test
public void testReplicatedFailbackBackupFromLiveBackToBackup() throws Exception {
InetSocketAddress address = new InetSocketAddress("127.0.0.1", 8787);
HttpServer httpServer = HttpServer.create(address, 100);
httpServer.start();
try {
httpServer.createContext("/", new HttpHandler() {
@Override
public void handle(HttpExchange t) throws IOException {
String response = "<html><body><b>This is a unit test</b></body></html>";
t.sendResponseHeaders(200, response.length());
OutputStream os = t.getResponseBody();
os.write(response.getBytes());
os.close();
}
});
WebServerDTO wdto = new WebServerDTO();
AppDTO appDTO = new AppDTO();
appDTO.war = "console.war";
appDTO.url = "console";
wdto.apps = new ArrayList<AppDTO>();
wdto.apps.add(appDTO);
wdto.bind = "http://localhost:0";
wdto.path = "console";
WebServerComponent webServerComponent = new WebServerComponent();
webServerComponent.configure(wdto, ".", ".");
webServerComponent.start();
backupServer.getServer().getNetworkHealthCheck().parseURIList("http://localhost:8787");
Assert.assertTrue(backupServer.getServer().getNetworkHealthCheck().isStarted());
backupServer.getServer().addExternalComponent(webServerComponent, false);
// this is called when backup servers go from live back to backup
backupServer.getServer().fail(true);
Assert.assertTrue(backupServer.getServer().getNetworkHealthCheck().isStarted());
Assert.assertTrue(backupServer.getServer().getExternalComponents().get(0).isStarted());
((ServiceComponent) (backupServer.getServer().getExternalComponents().get(0))).stop(true);
} finally {
httpServer.stop(0);
}
}
@Override
protected void createConfigs() throws Exception {
createPluggableReplicatedConfigs();
}
@Override
protected void setupHAPolicyConfiguration() {
((ReplicationPrimaryPolicyConfiguration) liveConfig.getHAPolicyConfiguration())
.setCheckForLiveServer(true);
((ReplicationBackupPolicyConfiguration) backupConfig.getHAPolicyConfiguration())
.setMaxSavedReplicatedJournalsSize(2)
.setAllowFailBack(true);
}
@Override
protected TransportConfiguration getAcceptorTransportConfiguration(final boolean live) {
return getNettyAcceptorTransportConfiguration(live);
}
@Override
protected TransportConfiguration getConnectorTransportConfiguration(final boolean live) {
return getNettyConnectorTransportConfiguration(live);
}
@Override
protected void crash(boolean waitFailure, ClientSession... sessions) throws Exception {
if (sessions.length > 0) {
for (ClientSession session : sessions) {
waitForRemoteBackup(session.getSessionFactory(), 5, true, backupServer.getServer());
}
} else {
waitForRemoteBackup(null, 5, true, backupServer.getServer());
}
super.crash(waitFailure, sessions);
}
@Override
protected void crash(ClientSession... sessions) throws Exception {
if (sessions.length > 0) {
for (ClientSession session : sessions) {
waitForRemoteBackup(session.getSessionFactory(), 5, true, backupServer.getServer());
}
} else {
waitForRemoteBackup(null, 5, true, backupServer.getServer());
}
super.crash(sessions);
}
}

View File

@ -0,0 +1,36 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.tests.integration.cluster.failover.quorum;
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration;
import org.apache.activemq.artemis.tests.integration.cluster.failover.NettyReplicationStopTest;
public class PluggableQuorumNettyReplicationStopTest extends NettyReplicationStopTest {
@Override
protected void createConfigs() throws Exception {
createPluggableReplicatedConfigs();
}
@Override
protected void setupHAPolicyConfiguration() {
((ReplicationPrimaryPolicyConfiguration) liveConfig.getHAPolicyConfiguration()).setCheckForLiveServer(true);
((ReplicationBackupPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setMaxSavedReplicatedJournalsSize(2).setAllowFailBack(true);
}
}

View File

@ -0,0 +1,36 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.tests.integration.cluster.failover.quorum;
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration;
import org.apache.activemq.artemis.tests.integration.cluster.failover.PageCleanupWhileReplicaCatchupTest;
public class PluggableQuorumPageCleanupWhileReplicaCatchupTest extends PageCleanupWhileReplicaCatchupTest {
@Override
protected void createConfigs() throws Exception {
createPluggableReplicatedConfigs();
}
@Override
protected void setupHAPolicyConfiguration() {
((ReplicationPrimaryPolicyConfiguration) liveConfig.getHAPolicyConfiguration()).setCheckForLiveServer(true);
((ReplicationBackupPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setMaxSavedReplicatedJournalsSize(2).setAllowFailBack(true);
}
}

View File

@ -0,0 +1,62 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.tests.integration.cluster.failover.quorum;
import java.io.IOException;
import java.util.Collections;
import org.apache.activemq.artemis.api.core.TransportConfiguration;
import org.apache.activemq.artemis.core.config.Configuration;
import org.apache.activemq.artemis.core.config.ha.DistributedPrimitiveManagerConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration;
import org.apache.activemq.artemis.quorum.file.FileBasedPrimitiveManager;
import org.apache.activemq.artemis.tests.integration.cluster.failover.ReplicaTimeoutTest;
import org.apache.activemq.artemis.tests.util.ReplicatedBackupUtils;
import org.junit.Rule;
import org.junit.rules.TemporaryFolder;
public class PluggableQuorumReplicaTimeoutTest extends ReplicaTimeoutTest {
@Rule
public TemporaryFolder tmpFolder = new TemporaryFolder();
@Override
protected void configureReplicationPair(Configuration backupConfig,
Configuration liveConfig,
TransportConfiguration backupConnector,
TransportConfiguration backupAcceptor,
TransportConfiguration liveConnector) throws IOException {
DistributedPrimitiveManagerConfiguration managerConfiguration = new DistributedPrimitiveManagerConfiguration(FileBasedPrimitiveManager.class.getName(), Collections.singletonMap("locks-folder", tmpFolder.newFolder("manager").toString()));
ReplicatedBackupUtils.configurePluggableQuorumReplicationPair(backupConfig, backupConnector, backupAcceptor,
liveConfig, liveConnector, null,
managerConfiguration, managerConfiguration);
ReplicationPrimaryPolicyConfiguration primaryConfiguration = ((ReplicationPrimaryPolicyConfiguration) liveConfig.getHAPolicyConfiguration());
primaryConfiguration.setInitialReplicationSyncTimeout(1000);
primaryConfiguration.setCheckForLiveServer(true);
ReplicationBackupPolicyConfiguration backupConfiguration = ((ReplicationBackupPolicyConfiguration) backupConfig.getHAPolicyConfiguration());
backupConfiguration.setInitialReplicationSyncTimeout(1000);
backupConfiguration.setMaxSavedReplicatedJournalsSize(2)
.setAllowFailBack(true);
}
@Override
protected boolean expectLiveSuicide() {
return false;
}
}

View File

@ -0,0 +1,27 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.tests.integration.cluster.failover.quorum;
import org.apache.activemq.artemis.tests.integration.cluster.failover.ReplicatedDistributionTest;
public class PluggableQuorumReplicatedDistributionTest extends ReplicatedDistributionTest {
@Override
protected HAType haType() {
return HAType.PluggableQuorumReplication;
}
}

View File

@ -14,23 +14,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.tests.integration.cluster.failover;
package org.apache.activemq.artemis.tests.integration.cluster.failover.quorum;
import org.apache.activemq.artemis.api.core.client.ClientSession;
import org.apache.activemq.artemis.core.client.impl.ClientSessionInternal;
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration;
import org.apache.activemq.artemis.tests.integration.cluster.failover.LargeMessageFailoverTest;
public class ReplicatedLargeMessageFailoverTest extends LargeMessageFailoverTest {
public class PluggableQuorumReplicatedLargeMessageFailoverTest extends LargeMessageFailoverTest {
@Override
protected void createConfigs() throws Exception {
createReplicatedConfigs();
createPluggableReplicatedConfigs();
}
@Override
protected void setupHAPolicyConfiguration() {
((ReplicationPrimaryPolicyConfiguration) liveConfig.getHAPolicyConfiguration()).setCheckForLiveServer(true);
((ReplicationBackupPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setMaxSavedReplicatedJournalsSize(2).setAllowFailBack(true);
}
@Override
protected void crash(boolean waitFailure, ClientSession... sessions) throws Exception {
if (sessions.length > 0) {
for (ClientSession session : sessions) {
waitForRemoteBackup(((ClientSessionInternal) session).getSessionFactory(), 5, true, backupServer.getServer());
waitForRemoteBackup(session.getSessionFactory(), 5, true, backupServer.getServer());
}
} else {
waitForRemoteBackup(null, 5, true, backupServer.getServer());
@ -42,11 +50,12 @@ public class ReplicatedLargeMessageFailoverTest extends LargeMessageFailoverTest
protected void crash(ClientSession... sessions) throws Exception {
if (sessions.length > 0) {
for (ClientSession session : sessions) {
waitForRemoteBackup(((ClientSessionInternal) session).getSessionFactory(), 5, true, backupServer.getServer());
waitForRemoteBackup(session.getSessionFactory(), 5, true, backupServer.getServer());
}
} else {
waitForRemoteBackup(null, 5, true, backupServer.getServer());
}
super.crash(sessions);
}
}

View File

@ -14,17 +14,16 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.tests.integration.cluster.failover;
package org.apache.activemq.artemis.tests.integration.cluster.failover.quorum;
import org.apache.activemq.artemis.api.core.client.ClientSession;
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration;
import org.apache.activemq.artemis.tests.integration.cluster.util.BackupSyncDelay;
import org.junit.After;
import org.junit.Before;
/**
* See {@link BackupSyncDelay} for the rationale about these 'WithDelay' tests.
*/
public class ReplicatedLargeMessageWithDelayFailoverTest extends ReplicatedLargeMessageFailoverTest {
public class PluggableQuorumReplicatedLargeMessageWithDelayFailoverTest extends PluggableQuorumReplicatedLargeMessageFailoverTest {
private BackupSyncDelay syncDelay;
@ -60,10 +59,23 @@ public class ReplicatedLargeMessageWithDelayFailoverTest extends ReplicatedLarge
super.crash(waitFailure, sessions);
}
@Override
protected void createConfigs() throws Exception {
createPluggableReplicatedConfigs();
}
@Override
protected void setupHAPolicyConfiguration() {
((ReplicationPrimaryPolicyConfiguration) liveConfig.getHAPolicyConfiguration()).setCheckForLiveServer(true);
((ReplicationBackupPolicyConfiguration) backupConfig.getHAPolicyConfiguration())
.setMaxSavedReplicatedJournalsSize(2).setAllowFailBack(true);
}
@Override
@After
public void tearDown() throws Exception {
syncDelay.deliverUpToDateMsg();
super.tearDown();
}
}

View File

@ -0,0 +1,35 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.tests.integration.cluster.failover.quorum;
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration;
import org.apache.activemq.artemis.tests.integration.cluster.failover.PagingFailoverTest;
public class PluggableQuorumReplicatedPagingFailoverTest extends PagingFailoverTest {
@Override
protected void createConfigs() throws Exception {
createPluggableReplicatedConfigs();
}
@Override
protected void setupHAPolicyConfiguration() {
((ReplicationPrimaryPolicyConfiguration) liveConfig.getHAPolicyConfiguration()).setCheckForLiveServer(true);
((ReplicationBackupPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setMaxSavedReplicatedJournalsSize(2).setAllowFailBack(true);
}
}

View File

@ -32,6 +32,8 @@ import org.apache.activemq.artemis.core.protocol.core.impl.wireformat.Replicatio
import org.apache.activemq.artemis.core.protocol.core.impl.wireformat.ReplicationStartSyncMessage;
import org.apache.activemq.artemis.core.replication.ReplicationEndpoint;
import org.apache.activemq.artemis.core.server.ActiveMQServer;
import org.apache.activemq.artemis.core.server.impl.Activation;
import org.apache.activemq.artemis.core.server.impl.ReplicationBackupActivation;
import org.apache.activemq.artemis.core.server.impl.SharedNothingBackupActivation;
import org.apache.activemq.artemis.spi.core.protocol.RemotingConnection;
@ -94,8 +96,18 @@ public class BackupSyncDelay implements Interceptor {
public boolean intercept(Packet packet, RemotingConnection connection) throws ActiveMQException {
if (packet.getType() == PacketImpl.BACKUP_REGISTRATION) {
try {
SharedNothingBackupActivation activation = (SharedNothingBackupActivation) backup.getActivation();
ReplicationEndpoint repEnd = activation.getReplicationEndpoint();
Activation backupActivation = backup.getActivation();
ReplicationEndpoint repEnd = null;
if (backupActivation instanceof SharedNothingBackupActivation) {
SharedNothingBackupActivation activation = (SharedNothingBackupActivation) backupActivation;
repEnd = activation.getReplicationEndpoint();
} else if (backupActivation instanceof ReplicationBackupActivation) {
ReplicationBackupActivation activation = (ReplicationBackupActivation) backupActivation;
repEnd = activation.getReplicationEndpoint();
}
if (repEnd == null) {
throw new NullPointerException("replication endpoint isn't supposed to be null");
}
handler.addSubHandler(repEnd);
Channel repChannel = repEnd.getChannel();
repChannel.setHandler(handler);

View File

@ -78,7 +78,7 @@ public class OpenWireProtocolManagerTest extends ActiveMQTestBase {
@Override
public ClusterManager getClusterManager() {
return new ClusterManager(getExecutorFactory(), this, null, null, null, null, null, false);
return new ClusterManager(getExecutorFactory(), this, null, null, null, null, null, true);
}
@Override

View File

@ -0,0 +1,60 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.tests.integration.replication;
import java.io.IOException;
import java.util.Collections;
import org.apache.activemq.artemis.core.config.HAPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.DistributedPrimitiveManagerConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration;
import org.apache.activemq.artemis.quorum.file.FileBasedPrimitiveManager;
import org.junit.Before;
import org.junit.Rule;
import org.junit.rules.TemporaryFolder;
public class PluggableQuorumReplicationFlowControlTest extends SharedNothingReplicationFlowControlTest {
private DistributedPrimitiveManagerConfiguration managerConfiguration;
@Rule
public TemporaryFolder tmpFolder = new TemporaryFolder();
@Before
public void init() throws IOException {
managerConfiguration = new DistributedPrimitiveManagerConfiguration(FileBasedPrimitiveManager.class.getName(), Collections.singletonMap("locks-folder", tmpFolder.newFolder("manager").toString()));
}
@Override
protected HAPolicyConfiguration createReplicationBackupConfiguration() {
ReplicationBackupPolicyConfiguration haPolicy = ReplicationBackupPolicyConfiguration.withDefault();
haPolicy.setDistributedManagerConfiguration(managerConfiguration);
haPolicy.setClusterName("cluster");
// fail-fast in order to let the backup to quickly retry syncing with primary
haPolicy.setVoteRetries(0);
return haPolicy;
}
@Override
protected HAPolicyConfiguration createReplicationLiveConfiguration() {
ReplicationPrimaryPolicyConfiguration haPolicy = ReplicationPrimaryPolicyConfiguration.withDefault();
haPolicy.setDistributedManagerConfiguration(managerConfiguration);
haPolicy.setCheckForLiveServer(false);
return haPolicy;
}
}

Some files were not shown because too many files have changed in this diff Show More