This closes #3680
This commit is contained in:
commit
c9f001215f
|
@ -19,7 +19,7 @@ package org.apache.activemq.artemis.cli.commands;
|
|||
import java.io.File;
|
||||
import java.util.Timer;
|
||||
import java.util.TimerTask;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
|
||||
import io.airlift.airline.Command;
|
||||
import io.airlift.airline.Option;
|
||||
|
@ -71,6 +71,7 @@ public class Run extends LockAbstract {
|
|||
public Object execute(ActionContext context) throws Exception {
|
||||
super.execute(context);
|
||||
|
||||
AtomicReference<Throwable> serverActivationFailed = new AtomicReference<>();
|
||||
try {
|
||||
BrokerDTO broker = getBrokerDTO();
|
||||
ActiveMQSecurityManager securityManager = SecurityManagerFactory.create(broker.security);
|
||||
|
@ -110,8 +111,7 @@ public class Run extends LockAbstract {
|
|||
server = BrokerFactory.createServer(broker.server, securityManager, activateCallback);
|
||||
|
||||
server.createComponents();
|
||||
AtomicBoolean serverActivationFailed = new AtomicBoolean(false);
|
||||
server.getServer().registerActivationFailureListener(exception -> serverActivationFailed.set(true));
|
||||
server.getServer().registerActivationFailureListener(exception -> serverActivationFailed.set(exception));
|
||||
server.start();
|
||||
server.getServer().addExternalComponent(managementContext, false);
|
||||
|
||||
|
@ -126,14 +126,16 @@ public class Run extends LockAbstract {
|
|||
server.getServer().addExternalComponent(component, true);
|
||||
assert component.isStarted();
|
||||
}
|
||||
|
||||
if (serverActivationFailed.get()) {
|
||||
stop();
|
||||
}
|
||||
} catch (Throwable t) {
|
||||
t.printStackTrace();
|
||||
stop();
|
||||
serverActivationFailed.set(t);
|
||||
}
|
||||
|
||||
if (serverActivationFailed.get() != null) {
|
||||
stop();
|
||||
return serverActivationFailed.get();
|
||||
}
|
||||
|
||||
return new Pair<>(managementContext, server.getServer());
|
||||
}
|
||||
|
||||
|
|
|
@ -2851,4 +2851,12 @@ public interface AuditLogger extends BasicLogger {
|
|||
@LogMessage(level = Logger.Level.INFO)
|
||||
@Message(id = 601748, value = "User {0} is getting max retry interval on target resource: {1} {2}", format = Message.Format.MESSAGE_FORMAT)
|
||||
void getMaxRetryInterval(String user, Object source, Object... args);
|
||||
|
||||
static void getActivationSequence(Object source) {
|
||||
BASE_LOGGER.getActivationSequence(getCaller(), source);
|
||||
}
|
||||
|
||||
@LogMessage(level = Logger.Level.INFO)
|
||||
@Message(id = 601749, value = "User {0} is getting activation sequence on target resource: {1} {2}", format = Message.Format.MESSAGE_FORMAT)
|
||||
void getActivationSequence(String user, Object source, Object... args);
|
||||
}
|
||||
|
|
|
@ -270,6 +270,18 @@ public class ThreadLeakCheckRule extends TestWatcher {
|
|||
} else if (threadName.contains("ObjectCleanerThread")) {
|
||||
// Required since upgrade to Netty 4.1.22 maybe because https://github.com/netty/netty/commit/739e70398ccb6b11ffa97c6b5f8d55e455a2165e
|
||||
return true;
|
||||
} else if (threadName.contains("RMI TCP")) {
|
||||
return true;
|
||||
} else if (threadName.contains("RMI Scheduler")) {
|
||||
return true;
|
||||
} else if (threadName.contains("RMI RenewClean")) {
|
||||
return true;
|
||||
} else if (threadName.contains("Signal Dispatcher")) {
|
||||
return true;
|
||||
} else if (threadName.contains("ForkJoinPool.commonPool")) {
|
||||
return true;
|
||||
} else if (threadName.contains("GC Daemon")) {
|
||||
return true;
|
||||
} else {
|
||||
for (StackTraceElement element : thread.getStackTrace()) {
|
||||
if (element.getClassName().contains("org.jboss.byteman.agent.TransformListener")) {
|
||||
|
|
|
@ -264,6 +264,9 @@ public final class ActiveMQDefaultConfiguration {
|
|||
// the directory to store the journal files in
|
||||
private static String DEFAULT_JOURNAL_DIR = "data/journal";
|
||||
|
||||
// the directory to store the data files in
|
||||
private static String DEFAULT_DATA_DIR = "data";
|
||||
|
||||
// true means that the journal directory will be created
|
||||
private static boolean DEFAULT_CREATE_JOURNAL_DIR = true;
|
||||
|
||||
|
@ -627,6 +630,8 @@ public final class ActiveMQDefaultConfiguration {
|
|||
|
||||
public static final String DEFAULT_TEMPORARY_QUEUE_NAMESPACE = "";
|
||||
|
||||
private static final String DEFAULT_DISTRIBUTED_PRIMITIVE_MANAGER_CLASS_NAME = "org.apache.activemq.artemis.quorum.zookeeper.CuratorDistributedPrimitiveManager";
|
||||
|
||||
// Number of concurrent workers for a core bridge
|
||||
public static int DEFAULT_BRIDGE_CONCURRENCY = 1;
|
||||
|
||||
|
@ -938,6 +943,13 @@ public final class ActiveMQDefaultConfiguration {
|
|||
return DEFAULT_JOURNAL_DIR;
|
||||
}
|
||||
|
||||
/**
|
||||
* the directory to store the journal files in
|
||||
*/
|
||||
public static String getDefaultDataDir() {
|
||||
return DEFAULT_DATA_DIR;
|
||||
}
|
||||
|
||||
/**
|
||||
* true means that the journal directory will be created
|
||||
*/
|
||||
|
@ -1721,6 +1733,10 @@ public final class ActiveMQDefaultConfiguration {
|
|||
return DEFAULT_TEMPORARY_QUEUE_NAMESPACE;
|
||||
}
|
||||
|
||||
public static String getDefaultDistributedPrimitiveManagerClassName() {
|
||||
return DEFAULT_DISTRIBUTED_PRIMITIVE_MANAGER_CLASS_NAME;
|
||||
}
|
||||
|
||||
public static int getDefaultBridgeConcurrency() {
|
||||
return DEFAULT_BRIDGE_CONCURRENCY;
|
||||
}
|
||||
|
|
|
@ -321,6 +321,15 @@ public interface ActiveMQServerControl {
|
|||
@Attribute(desc = "Node ID of this server")
|
||||
String getNodeID();
|
||||
|
||||
|
||||
/**
|
||||
* Returns the current activation sequence number of this server.
|
||||
* <br>
|
||||
* When replicated, peers may coordinate activation with this monotonic sequence
|
||||
*/
|
||||
@Attribute(desc = "Activation sequence of this server instance")
|
||||
long getActivationSequence();
|
||||
|
||||
/**
|
||||
* Returns the management notification address of this server.
|
||||
* <br>
|
||||
|
|
|
@ -231,6 +231,17 @@
|
|||
<version>${project.version}</version>
|
||||
<classifier>javadoc</classifier>
|
||||
</dependency>
|
||||
<!-- quorum -->
|
||||
<dependency>
|
||||
<groupId>org.apache.activemq</groupId>
|
||||
<artifactId>artemis-quorum-api</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.activemq</groupId>
|
||||
<artifactId>artemis-quorum-ri</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>io.netty</groupId>
|
||||
<artifactId>netty-all</artifactId>
|
||||
|
|
|
@ -62,6 +62,9 @@
|
|||
<include>org.apache.activemq.rest:artemis-rest</include>
|
||||
<include>org.apache.qpid:qpid-jms-client</include>
|
||||
<include>io.micrometer:micrometer-core</include>
|
||||
<!-- quorum -->
|
||||
<include>org.apache.activemq:artemis-quorum-api</include>
|
||||
<include>org.apache.activemq:artemis-quorum-ri</include>
|
||||
|
||||
<!-- dependencies -->
|
||||
<include>jakarta.jms:jakarta.jms-api</include>
|
||||
|
@ -97,6 +100,12 @@
|
|||
<include>com.sun.xml.bind:jaxb-impl</include>
|
||||
<include>jakarta.activation:jakarta.activation-api</include>
|
||||
<include>jakarta.security.auth.message:jakarta.security.auth.message-api</include>
|
||||
<!-- quorum -->
|
||||
<include>org.apache.curator:curator-recipes</include>
|
||||
<include>org.apache.curator:curator-client</include>
|
||||
<include>org.apache.curator:curator-framework</include>
|
||||
<include>org.apache.zookeeper:zookeeper</include>
|
||||
<include>org.apache.zookeeper:zookeeper-jute</include>
|
||||
</includes>
|
||||
<!--excludes>
|
||||
<exclude>org.apache.activemq:artemis-website</exclude>
|
||||
|
|
|
@ -81,6 +81,7 @@
|
|||
<!--bundle dependency="true">mvn:io.micrometer/micrometer-core/${version.micrometer}</bundle-->
|
||||
|
||||
<bundle>mvn:org.apache.activemq/activemq-artemis-native/${activemq-artemis-native-version}</bundle>
|
||||
<bundle>mvn:org.apache.activemq/artemis-quorum-api/${pom.version}</bundle>
|
||||
<bundle>mvn:org.apache.activemq/artemis-server-osgi/${pom.version}</bundle>
|
||||
</feature>
|
||||
|
||||
|
|
|
@ -0,0 +1,41 @@
|
|||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<parent>
|
||||
<groupId>org.apache.activemq</groupId>
|
||||
<artifactId>artemis-pom</artifactId>
|
||||
<version>2.18.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<artifactId>artemis-quorum-api</artifactId>
|
||||
<packaging>bundle</packaging>
|
||||
<name>ActiveMQ Artemis Quorum API</name>
|
||||
|
||||
<properties>
|
||||
<activemq.basedir>${project.basedir}/..</activemq.basedir>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>com.google.errorprone</groupId>
|
||||
<artifactId>error_prone_core</artifactId>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
|
@ -0,0 +1,87 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.activemq.artemis.quorum;
|
||||
|
||||
import java.util.Objects;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.locks.LockSupport;
|
||||
|
||||
public interface DistributedLock extends AutoCloseable {
|
||||
|
||||
String getLockId();
|
||||
|
||||
boolean isHeldByCaller() throws UnavailableStateException;
|
||||
|
||||
boolean tryLock() throws UnavailableStateException, InterruptedException;
|
||||
|
||||
default boolean tryLock(long timeout, TimeUnit unit) throws UnavailableStateException, InterruptedException {
|
||||
// it doesn't make sense to be super fast
|
||||
final long TARGET_FIRE_PERIOD_NS = TimeUnit.MILLISECONDS.toNanos(250);
|
||||
if (timeout < 0) {
|
||||
throw new IllegalArgumentException("timeout cannot be negative");
|
||||
}
|
||||
Objects.requireNonNull(unit);
|
||||
if (timeout == 0) {
|
||||
return tryLock();
|
||||
}
|
||||
final Thread currentThread = Thread.currentThread();
|
||||
final long timeoutNs = unit.toNanos(timeout);
|
||||
final long start = System.nanoTime();
|
||||
final long deadline = start + timeoutNs;
|
||||
long expectedNextFireTime = start;
|
||||
while (!currentThread.isInterrupted()) {
|
||||
long parkNs = expectedNextFireTime - System.nanoTime();
|
||||
while (parkNs > 0) {
|
||||
LockSupport.parkNanos(parkNs);
|
||||
if (currentThread.isInterrupted()) {
|
||||
throw new InterruptedException();
|
||||
}
|
||||
final long now = System.nanoTime();
|
||||
parkNs = expectedNextFireTime - now;
|
||||
}
|
||||
if (tryLock()) {
|
||||
return true;
|
||||
}
|
||||
final long now = System.nanoTime();
|
||||
final long remainingTime = deadline - now;
|
||||
if (remainingTime <= 0) {
|
||||
return false;
|
||||
}
|
||||
if (remainingTime < TARGET_FIRE_PERIOD_NS) {
|
||||
expectedNextFireTime = now;
|
||||
} else {
|
||||
expectedNextFireTime += TARGET_FIRE_PERIOD_NS;
|
||||
}
|
||||
}
|
||||
throw new InterruptedException();
|
||||
}
|
||||
|
||||
void unlock() throws UnavailableStateException;
|
||||
|
||||
void addListener(UnavailableLockListener listener);
|
||||
|
||||
void removeListener(UnavailableLockListener listener);
|
||||
|
||||
@FunctionalInterface
|
||||
interface UnavailableLockListener {
|
||||
|
||||
void onUnavailableLockEvent();
|
||||
}
|
||||
|
||||
@Override
|
||||
void close();
|
||||
}
|
|
@ -0,0 +1,56 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.activemq.artemis.quorum;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.TimeoutException;
|
||||
|
||||
public interface DistributedPrimitiveManager extends AutoCloseable {
|
||||
|
||||
static DistributedPrimitiveManager newInstanceOf(String className, Map<String, String> properties) throws Exception {
|
||||
return (DistributedPrimitiveManager) Class.forName(className).getDeclaredConstructor(Map.class).newInstance(properties);
|
||||
}
|
||||
|
||||
@FunctionalInterface
|
||||
interface UnavailableManagerListener {
|
||||
|
||||
void onUnavailableManagerEvent();
|
||||
}
|
||||
|
||||
void addUnavailableManagerListener(UnavailableManagerListener listener);
|
||||
|
||||
void removeUnavailableManagerListener(UnavailableManagerListener listener);
|
||||
|
||||
boolean start(long timeout, TimeUnit unit) throws InterruptedException, ExecutionException;
|
||||
|
||||
void start() throws InterruptedException, ExecutionException;
|
||||
|
||||
boolean isStarted();
|
||||
|
||||
void stop();
|
||||
|
||||
DistributedLock getDistributedLock(String lockId) throws InterruptedException, ExecutionException, TimeoutException;
|
||||
|
||||
MutableLong getMutableLong(String mutableLongId) throws InterruptedException, ExecutionException, TimeoutException;
|
||||
|
||||
@Override
|
||||
default void close() {
|
||||
stop();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,51 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.apache.activemq.artemis.quorum;
|
||||
|
||||
public interface MutableLong extends AutoCloseable {
|
||||
|
||||
String getMutableLongId();
|
||||
|
||||
long get() throws UnavailableStateException;
|
||||
|
||||
void set(long value) throws UnavailableStateException;
|
||||
|
||||
/**
|
||||
* This is not meant to be atomic; it's semantically equivalent to:
|
||||
* <pre>
|
||||
* long oldValue = mutableLong.get();
|
||||
* if (mutableLong.oldValue != expectedValue) {
|
||||
* return false;
|
||||
* }
|
||||
* mutableLong.set(newValue);
|
||||
* return true;
|
||||
* </pre>
|
||||
*/
|
||||
default boolean compareAndSet(long expectedValue, long newValue) throws UnavailableStateException {
|
||||
final long oldValue = get();
|
||||
if (oldValue != expectedValue) {
|
||||
return false;
|
||||
}
|
||||
set(newValue);
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
void close();
|
||||
}
|
|
@ -0,0 +1,36 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.activemq.artemis.quorum;
|
||||
|
||||
public final class UnavailableStateException extends Exception {
|
||||
|
||||
public UnavailableStateException() {
|
||||
super();
|
||||
}
|
||||
|
||||
public UnavailableStateException(String message) {
|
||||
super(message);
|
||||
}
|
||||
|
||||
public UnavailableStateException(String message, Throwable cause) {
|
||||
super(message, cause);
|
||||
}
|
||||
|
||||
public UnavailableStateException(Throwable cause) {
|
||||
super(cause);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,134 @@
|
|||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<parent>
|
||||
<groupId>org.apache.activemq</groupId>
|
||||
<artifactId>artemis-pom</artifactId>
|
||||
<version>2.18.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<artifactId>artemis-quorum-ri</artifactId>
|
||||
<packaging>jar</packaging>
|
||||
<name>ActiveMQ Artemis Quorum RI</name>
|
||||
|
||||
<properties>
|
||||
<activemq.basedir>${project.basedir}/..</activemq.basedir>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.curator</groupId>
|
||||
<artifactId>curator-recipes</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.curator</groupId>
|
||||
<artifactId>curator-client</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.zookeeper</groupId>
|
||||
<artifactId>zookeeper</artifactId>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-log4j12</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.curator</groupId>
|
||||
<artifactId>curator-test</artifactId>
|
||||
<version>${curator.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.activemq</groupId>
|
||||
<artifactId>artemis-quorum-api</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.jboss.logging</groupId>
|
||||
<artifactId>jboss-logging</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.jboss.slf4j</groupId>
|
||||
<artifactId>slf4j-jboss-logmanager</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.activemq</groupId>
|
||||
<artifactId>artemis-commons</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.google.errorprone</groupId>
|
||||
<artifactId>error_prone_core</artifactId>
|
||||
</dependency>
|
||||
<!-- tests -->
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.hamcrest</groupId>
|
||||
<artifactId>hamcrest</artifactId>
|
||||
<version>${hamcrest.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<!-- test logging -->
|
||||
<dependency>
|
||||
<groupId>org.jboss.logging</groupId>
|
||||
<artifactId>jboss-logging-processor</artifactId>
|
||||
<scope>provided</scope>
|
||||
<optional>true</optional>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.jboss.logmanager</groupId>
|
||||
<artifactId>jboss-logmanager</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.wildfly.common</groupId>
|
||||
<artifactId>wildfly-common</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.activemq</groupId>
|
||||
<artifactId>artemis-commons</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<scope>test</scope>
|
||||
<type>test-jar</type>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-jar-plugin</artifactId>
|
||||
<executions>
|
||||
<execution>
|
||||
<phase>test</phase>
|
||||
<goals>
|
||||
<goal>test-jar</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
|
@ -0,0 +1,183 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.activemq.artemis.quorum.file;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import org.apache.activemq.artemis.quorum.DistributedLock;
|
||||
import org.apache.activemq.artemis.quorum.DistributedPrimitiveManager;
|
||||
import org.apache.activemq.artemis.quorum.MutableLong;
|
||||
import org.apache.activemq.artemis.quorum.UnavailableStateException;
|
||||
|
||||
/**
|
||||
* This is an implementation suitable to be used just on unit tests and it won't attempt
|
||||
* to manage nor purge existing stale locks files. It's part of the tests life-cycle to properly
|
||||
* set-up and tear-down the environment.
|
||||
*/
|
||||
public class FileBasedPrimitiveManager implements DistributedPrimitiveManager {
|
||||
|
||||
private final File locksFolder;
|
||||
private final Map<String, FileDistributedLock> locks;
|
||||
private boolean started;
|
||||
|
||||
public FileBasedPrimitiveManager(Map<String, String> args) {
|
||||
this(new File(args.get("locks-folder")));
|
||||
}
|
||||
|
||||
public FileBasedPrimitiveManager(File locksFolder) {
|
||||
Objects.requireNonNull(locksFolder);
|
||||
if (!locksFolder.exists()) {
|
||||
throw new IllegalStateException(locksFolder + " is supposed to already exists");
|
||||
}
|
||||
if (!locksFolder.isDirectory()) {
|
||||
throw new IllegalStateException(locksFolder + " is supposed to be a directory");
|
||||
}
|
||||
this.locksFolder = locksFolder;
|
||||
this.locks = new HashMap<>();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isStarted() {
|
||||
return started;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addUnavailableManagerListener(UnavailableManagerListener listener) {
|
||||
// noop
|
||||
}
|
||||
|
||||
@Override
|
||||
public void removeUnavailableManagerListener(UnavailableManagerListener listener) {
|
||||
// noop
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean start(long timeout, TimeUnit unit) throws InterruptedException, ExecutionException {
|
||||
if (timeout >= 0) {
|
||||
Objects.requireNonNull(unit);
|
||||
}
|
||||
if (started) {
|
||||
return true;
|
||||
}
|
||||
started = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void start() throws InterruptedException, ExecutionException {
|
||||
start(-1, null);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void stop() {
|
||||
if (!started) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
locks.forEach((lockId, lock) -> {
|
||||
try {
|
||||
lock.close(false);
|
||||
} catch (Throwable t) {
|
||||
// TODO no op for now: log would be better!
|
||||
}
|
||||
});
|
||||
locks.clear();
|
||||
} finally {
|
||||
started = false;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public DistributedLock getDistributedLock(String lockId) throws ExecutionException {
|
||||
Objects.requireNonNull(lockId);
|
||||
if (!started) {
|
||||
throw new IllegalStateException("manager should be started first");
|
||||
}
|
||||
final FileDistributedLock lock = locks.get(lockId);
|
||||
if (lock != null && !lock.isClosed()) {
|
||||
return lock;
|
||||
}
|
||||
try {
|
||||
final FileDistributedLock newLock = new FileDistributedLock(locks::remove, locksFolder, lockId);
|
||||
locks.put(lockId, newLock);
|
||||
return newLock;
|
||||
} catch (IOException ioEx) {
|
||||
throw new ExecutionException(ioEx);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public MutableLong getMutableLong(final String mutableLongId) throws ExecutionException {
|
||||
// use a lock file - but with a prefix
|
||||
final FileDistributedLock fileDistributedLock = (FileDistributedLock) getDistributedLock("ML:" + mutableLongId);
|
||||
return new MutableLong() {
|
||||
@Override
|
||||
public String getMutableLongId() {
|
||||
return mutableLongId;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long get() throws UnavailableStateException {
|
||||
try {
|
||||
return readLong(fileDistributedLock);
|
||||
} catch (IOException e) {
|
||||
throw new UnavailableStateException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void set(long value) throws UnavailableStateException {
|
||||
try {
|
||||
writeLong(fileDistributedLock, value);
|
||||
} catch (IOException e) {
|
||||
throw new UnavailableStateException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
fileDistributedLock.close();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private void writeLong(FileDistributedLock fileDistributedLock, long value) throws IOException {
|
||||
ByteBuffer buffer = ByteBuffer.allocate(Long.BYTES).order(ByteOrder.BIG_ENDIAN);
|
||||
buffer.putLong(value);
|
||||
buffer.flip();
|
||||
if (fileDistributedLock.getChannel().position(0).write(buffer) == Long.BYTES) {
|
||||
fileDistributedLock.getChannel().force(false);
|
||||
}
|
||||
}
|
||||
|
||||
private long readLong(FileDistributedLock fileDistributedLock) throws IOException {
|
||||
ByteBuffer buffer = ByteBuffer.allocate(Long.BYTES).order(ByteOrder.BIG_ENDIAN);
|
||||
if (fileDistributedLock.getChannel().position(0).read(buffer, 0) != Long.BYTES) {
|
||||
return 0;
|
||||
}
|
||||
buffer.flip();
|
||||
return buffer.getLong();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,145 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.activemq.artemis.quorum.file;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.nio.channels.FileLock;
|
||||
import java.nio.channels.OverlappingFileLockException;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.util.function.Consumer;
|
||||
|
||||
import org.apache.activemq.artemis.quorum.DistributedLock;
|
||||
|
||||
final class FileDistributedLock implements DistributedLock {
|
||||
|
||||
private final String lockId;
|
||||
private final Consumer<String> onClosedLock;
|
||||
private boolean closed;
|
||||
private FileLock fileLock;
|
||||
private final FileChannel channel;
|
||||
|
||||
FileDistributedLock(Consumer<String> onClosedLock, File locksFolder, String lockId) throws IOException {
|
||||
this.onClosedLock = onClosedLock;
|
||||
this.lockId = lockId;
|
||||
this.closed = false;
|
||||
this.fileLock = null;
|
||||
this.channel = FileChannel.open(new File(locksFolder, lockId).toPath(), StandardOpenOption.CREATE, StandardOpenOption.READ, StandardOpenOption.WRITE);
|
||||
}
|
||||
|
||||
private void checkNotClosed() {
|
||||
if (closed) {
|
||||
throw new IllegalStateException("This lock is closed");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getLockId() {
|
||||
checkNotClosed();
|
||||
return lockId;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isHeldByCaller() {
|
||||
checkNotClosed();
|
||||
final FileLock fileLock = this.fileLock;
|
||||
if (fileLock == null) {
|
||||
return false;
|
||||
}
|
||||
return fileLock.isValid();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean tryLock() {
|
||||
checkNotClosed();
|
||||
final FileLock fileLock = this.fileLock;
|
||||
if (fileLock != null) {
|
||||
throw new IllegalStateException("unlock first");
|
||||
}
|
||||
final FileLock lock;
|
||||
try {
|
||||
lock = channel.tryLock();
|
||||
} catch (OverlappingFileLockException o) {
|
||||
// this process already hold this lock, but not this manager
|
||||
return false;
|
||||
} catch (Throwable t) {
|
||||
throw new IllegalStateException(t);
|
||||
}
|
||||
if (lock == null) {
|
||||
return false;
|
||||
}
|
||||
this.fileLock = lock;
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void unlock() {
|
||||
checkNotClosed();
|
||||
final FileLock fileLock = this.fileLock;
|
||||
if (fileLock != null) {
|
||||
this.fileLock = null;
|
||||
try {
|
||||
fileLock.close();
|
||||
} catch (IOException e) {
|
||||
// noop
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addListener(UnavailableLockListener listener) {
|
||||
checkNotClosed();
|
||||
// noop
|
||||
}
|
||||
|
||||
@Override
|
||||
public void removeListener(UnavailableLockListener listener) {
|
||||
checkNotClosed();
|
||||
// noop
|
||||
}
|
||||
|
||||
public boolean isClosed() {
|
||||
return closed;
|
||||
}
|
||||
|
||||
public void close(boolean useCallback) {
|
||||
if (closed) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
if (useCallback) {
|
||||
onClosedLock.accept(lockId);
|
||||
}
|
||||
unlock();
|
||||
channel.close();
|
||||
} catch (IOException e) {
|
||||
// ignore it
|
||||
} finally {
|
||||
closed = true;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
close(true);
|
||||
}
|
||||
|
||||
public FileChannel getChannel() {
|
||||
return channel;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,171 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.activemq.artemis.quorum.zookeeper;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Arrays;
|
||||
import java.util.UUID;
|
||||
import java.util.concurrent.CopyOnWriteArrayList;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import org.apache.activemq.artemis.quorum.DistributedLock;
|
||||
import org.apache.activemq.artemis.quorum.UnavailableStateException;
|
||||
import org.apache.activemq.artemis.quorum.zookeeper.CuratorDistributedPrimitiveManager.PrimitiveId;
|
||||
import org.apache.curator.framework.recipes.locks.InterProcessSemaphoreV2;
|
||||
import org.apache.curator.framework.recipes.locks.Lease;
|
||||
|
||||
final class CuratorDistributedLock extends CuratorDistributedPrimitive implements DistributedLock {
|
||||
|
||||
private final InterProcessSemaphoreV2 ipcSem;
|
||||
private final CopyOnWriteArrayList<UnavailableLockListener> listeners;
|
||||
private Lease lease;
|
||||
private byte[] leaseVersion;
|
||||
|
||||
CuratorDistributedLock(PrimitiveId id, CuratorDistributedPrimitiveManager manager, InterProcessSemaphoreV2 ipcSem) {
|
||||
super(id, manager);
|
||||
this.ipcSem = ipcSem;
|
||||
this.listeners = new CopyOnWriteArrayList<>();
|
||||
this.leaseVersion = null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void handleReconnected() {
|
||||
super.handleReconnected();
|
||||
if (leaseVersion != null) {
|
||||
assert lease != null;
|
||||
try {
|
||||
if (Arrays.equals(lease.getData(), leaseVersion)) {
|
||||
return;
|
||||
}
|
||||
onLost();
|
||||
} catch (Exception e) {
|
||||
onLost();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void handleLost() {
|
||||
super.handleLost();
|
||||
lease = null;
|
||||
leaseVersion = null;
|
||||
for (UnavailableLockListener listener : listeners) {
|
||||
listener.onUnavailableLockEvent();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getLockId() {
|
||||
return getId().id;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isHeldByCaller() throws UnavailableStateException {
|
||||
return run(() -> {
|
||||
checkUnavailable();
|
||||
if (lease == null) {
|
||||
return false;
|
||||
}
|
||||
assert leaseVersion != null;
|
||||
try {
|
||||
return Arrays.equals(lease.getData(), leaseVersion);
|
||||
} catch (Throwable t) {
|
||||
throw new UnavailableStateException(t);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean tryLock() throws UnavailableStateException, InterruptedException {
|
||||
return tryRun(() -> {
|
||||
if (lease != null) {
|
||||
throw new IllegalStateException("unlock first");
|
||||
}
|
||||
checkUnavailable();
|
||||
try {
|
||||
final byte[] leaseVersion = UUID.randomUUID().toString().getBytes(StandardCharsets.UTF_8);
|
||||
ipcSem.setNodeData(leaseVersion);
|
||||
lease = ipcSem.acquire(0, TimeUnit.NANOSECONDS);
|
||||
if (lease == null) {
|
||||
ipcSem.setNodeData(null);
|
||||
return false;
|
||||
}
|
||||
this.leaseVersion = leaseVersion;
|
||||
assert Arrays.equals(lease.getData(), leaseVersion);
|
||||
return true;
|
||||
} catch (InterruptedException ie) {
|
||||
throw ie;
|
||||
} catch (Throwable e) {
|
||||
throw new UnavailableStateException(e);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@Override
|
||||
public void unlock() throws UnavailableStateException {
|
||||
run(() -> {
|
||||
checkUnavailable();
|
||||
final Lease lease = this.lease;
|
||||
if (lease != null) {
|
||||
this.lease = null;
|
||||
this.leaseVersion = null;
|
||||
try {
|
||||
ipcSem.returnLease(lease);
|
||||
} catch (Throwable e) {
|
||||
throw new UnavailableStateException(e);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
});
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addListener(UnavailableLockListener listener) {
|
||||
run(() -> {
|
||||
listeners.add(listener);
|
||||
fireUnavailableListener(listener::onUnavailableLockEvent);
|
||||
return null;
|
||||
});
|
||||
}
|
||||
|
||||
@Override
|
||||
public void removeListener(UnavailableLockListener listener) {
|
||||
run(() -> {
|
||||
listeners.remove(listener);
|
||||
return null;
|
||||
});
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void handleClosed() {
|
||||
super.handleClosed();
|
||||
listeners.clear();
|
||||
final Lease lease = this.lease;
|
||||
if (lease == null) {
|
||||
return;
|
||||
}
|
||||
this.lease = null;
|
||||
if (isUnavailable()) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
ipcSem.returnLease(lease);
|
||||
} catch (Throwable t) {
|
||||
// TODO silent, but debug ;)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,172 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.activemq.artemis.quorum.zookeeper;
|
||||
|
||||
import org.apache.activemq.artemis.quorum.UnavailableStateException;
|
||||
import org.apache.activemq.artemis.quorum.zookeeper.CuratorDistributedPrimitiveManager.PrimitiveId;
|
||||
|
||||
import static org.apache.activemq.artemis.quorum.zookeeper.CuratorDistributedPrimitiveManager.PrimitiveType.validatePrimitiveInstance;
|
||||
|
||||
public abstract class CuratorDistributedPrimitive implements AutoCloseable {
|
||||
|
||||
// this is used to prevent deadlocks on close
|
||||
private final CuratorDistributedPrimitiveManager manager;
|
||||
private final PrimitiveId id;
|
||||
|
||||
private boolean unavailable;
|
||||
private boolean closed;
|
||||
|
||||
protected CuratorDistributedPrimitive(PrimitiveId id, CuratorDistributedPrimitiveManager manager) {
|
||||
this.id = id;
|
||||
this.manager = manager;
|
||||
this.closed = false;
|
||||
this.unavailable = false;
|
||||
validatePrimitiveInstance(this);
|
||||
}
|
||||
|
||||
final PrimitiveId getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
final void onReconnected() {
|
||||
synchronized (manager) {
|
||||
if (closed || unavailable) {
|
||||
return;
|
||||
}
|
||||
handleReconnected();
|
||||
}
|
||||
}
|
||||
|
||||
protected void handleReconnected() {
|
||||
|
||||
}
|
||||
|
||||
final void onLost() {
|
||||
synchronized (manager) {
|
||||
if (closed || unavailable) {
|
||||
return;
|
||||
}
|
||||
unavailable = true;
|
||||
handleLost();
|
||||
}
|
||||
}
|
||||
|
||||
protected void handleLost() {
|
||||
|
||||
}
|
||||
|
||||
final void onSuspended() {
|
||||
synchronized (manager) {
|
||||
if (closed || unavailable) {
|
||||
return;
|
||||
}
|
||||
handleSuspended();
|
||||
}
|
||||
}
|
||||
|
||||
protected void handleSuspended() {
|
||||
|
||||
}
|
||||
|
||||
final void onRemoved() {
|
||||
close(false);
|
||||
}
|
||||
|
||||
private void checkNotClosed() {
|
||||
if (closed) {
|
||||
throw new IllegalStateException("This lock is closed");
|
||||
}
|
||||
}
|
||||
|
||||
@FunctionalInterface
|
||||
protected interface PrimitiveAction<R, T extends Throwable> {
|
||||
|
||||
R call() throws T;
|
||||
}
|
||||
|
||||
@FunctionalInterface
|
||||
protected interface InterruptablePrimitiveAction<R, T extends Throwable> {
|
||||
|
||||
R call() throws InterruptedException, T;
|
||||
}
|
||||
|
||||
protected final void checkUnavailable() throws UnavailableStateException {
|
||||
if (unavailable) {
|
||||
throw new UnavailableStateException(id.type + " with id = " + id.id + " isn't available");
|
||||
}
|
||||
}
|
||||
|
||||
protected final void fireUnavailableListener(Runnable task) {
|
||||
run(() -> {
|
||||
if (!unavailable) {
|
||||
return false;
|
||||
}
|
||||
manager.startHandlingEvents();
|
||||
try {
|
||||
task.run();
|
||||
} finally {
|
||||
manager.completeHandlingEvents();
|
||||
}
|
||||
return true;
|
||||
});
|
||||
}
|
||||
|
||||
protected final <R, T extends Throwable> R run(PrimitiveAction<R, T> action) throws T {
|
||||
synchronized (manager) {
|
||||
manager.checkHandlingEvents();
|
||||
checkNotClosed();
|
||||
return action.call();
|
||||
}
|
||||
}
|
||||
|
||||
protected final <R, T extends Throwable> R tryRun(InterruptablePrimitiveAction<R, T> action) throws InterruptedException, T {
|
||||
synchronized (manager) {
|
||||
manager.checkHandlingEvents();
|
||||
checkNotClosed();
|
||||
return action.call();
|
||||
}
|
||||
}
|
||||
|
||||
private void close(boolean remove) {
|
||||
synchronized (manager) {
|
||||
manager.checkHandlingEvents();
|
||||
if (closed) {
|
||||
return;
|
||||
}
|
||||
closed = true;
|
||||
if (remove) {
|
||||
manager.remove(this);
|
||||
}
|
||||
handleClosed();
|
||||
}
|
||||
}
|
||||
|
||||
protected void handleClosed() {
|
||||
|
||||
}
|
||||
|
||||
protected final boolean isUnavailable() {
|
||||
synchronized (manager) {
|
||||
return unavailable;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public final void close() {
|
||||
close(true);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,367 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.activemq.artemis.quorum.zookeeper;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.CopyOnWriteArrayList;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.apache.activemq.artemis.quorum.DistributedLock;
|
||||
import org.apache.activemq.artemis.quorum.DistributedPrimitiveManager;
|
||||
import org.apache.activemq.artemis.quorum.MutableLong;
|
||||
import org.apache.curator.framework.CuratorFramework;
|
||||
import org.apache.curator.framework.CuratorFrameworkFactory;
|
||||
import org.apache.curator.framework.recipes.atomic.DistributedAtomicLong;
|
||||
import org.apache.curator.framework.recipes.locks.InterProcessSemaphoreV2;
|
||||
import org.apache.curator.framework.state.ConnectionState;
|
||||
import org.apache.curator.framework.state.ConnectionStateListener;
|
||||
import org.apache.curator.retry.RetryForever;
|
||||
import org.apache.curator.retry.RetryNTimes;
|
||||
|
||||
import static java.util.Objects.requireNonNull;
|
||||
import static java.util.stream.Collectors.joining;
|
||||
|
||||
public class CuratorDistributedPrimitiveManager implements DistributedPrimitiveManager, ConnectionStateListener {
|
||||
|
||||
enum PrimitiveType {
|
||||
lock, mutableLong;
|
||||
|
||||
static <T extends CuratorDistributedPrimitive> T validatePrimitiveInstance(T primitive) {
|
||||
if (primitive == null) {
|
||||
return null;
|
||||
}
|
||||
boolean valid = false;
|
||||
switch (primitive.getId().type) {
|
||||
|
||||
case lock:
|
||||
valid = primitive instanceof CuratorDistributedLock;
|
||||
break;
|
||||
case mutableLong:
|
||||
valid = primitive instanceof CuratorMutableLong;
|
||||
break;
|
||||
}
|
||||
if (!valid) {
|
||||
throw new AssertionError("Implementation error: " + primitive.getClass() + " is wrongly considered " + primitive.getId().type);
|
||||
}
|
||||
return primitive;
|
||||
}
|
||||
}
|
||||
|
||||
static final class PrimitiveId {
|
||||
|
||||
final String id;
|
||||
final PrimitiveType type;
|
||||
|
||||
private PrimitiveId(String id, PrimitiveType type) {
|
||||
this.id = requireNonNull(id);
|
||||
this.type = requireNonNull(type);
|
||||
}
|
||||
|
||||
static PrimitiveId of(String id, PrimitiveType type) {
|
||||
return new PrimitiveId(id, type);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o)
|
||||
return true;
|
||||
if (o == null || getClass() != o.getClass())
|
||||
return false;
|
||||
|
||||
PrimitiveId that = (PrimitiveId) o;
|
||||
|
||||
if (!Objects.equals(id, that.id))
|
||||
return false;
|
||||
return type == that.type;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int result = id != null ? id.hashCode() : 0;
|
||||
result = 31 * result + (type != null ? type.hashCode() : 0);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
private static final String CONNECT_STRING_PARAM = "connect-string";
|
||||
private static final String NAMESPACE_PARAM = "namespace";
|
||||
private static final String SESSION_MS_PARAM = "session-ms";
|
||||
private static final String SESSION_PERCENT_PARAM = "session-percent";
|
||||
private static final String CONNECTION_MS_PARAM = "connection-ms";
|
||||
private static final String RETRIES_PARAM = "retries";
|
||||
private static final String RETRIES_MS_PARAM = "retries-ms";
|
||||
private static final Set<String> VALID_PARAMS = Stream.of(
|
||||
CONNECT_STRING_PARAM,
|
||||
NAMESPACE_PARAM,
|
||||
SESSION_MS_PARAM,
|
||||
SESSION_PERCENT_PARAM,
|
||||
CONNECTION_MS_PARAM,
|
||||
RETRIES_PARAM,
|
||||
RETRIES_MS_PARAM).collect(Collectors.toSet());
|
||||
private static final String VALID_PARAMS_ON_ERROR = VALID_PARAMS.stream().collect(joining(","));
|
||||
// It's 9 times the default ZK tick time ie 2000 ms
|
||||
private static final String DEFAULT_SESSION_TIMEOUT_MS = Integer.toString(18_000);
|
||||
private static final String DEFAULT_CONNECTION_TIMEOUT_MS = Integer.toString(8_000);
|
||||
private static final String DEFAULT_RETRIES = Integer.toString(1);
|
||||
private static final String DEFAULT_RETRIES_MS = Integer.toString(1000);
|
||||
// why 1/3 of the session? https://cwiki.apache.org/confluence/display/CURATOR/TN14
|
||||
private static final String DEFAULT_SESSION_PERCENT = Integer.toString(33);
|
||||
|
||||
private static Map<String, String> validateParameters(Map<String, String> config) {
|
||||
config.forEach((parameterName, ignore) -> validateParameter(parameterName));
|
||||
return config;
|
||||
}
|
||||
|
||||
private static void validateParameter(String parameterName) {
|
||||
if (!VALID_PARAMS.contains(parameterName)) {
|
||||
throw new IllegalArgumentException("non existent parameter " + parameterName + ": accepted list is " + VALID_PARAMS_ON_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
private CuratorFramework client;
|
||||
private final Map<PrimitiveId, CuratorDistributedPrimitive> primitives;
|
||||
private CopyOnWriteArrayList<UnavailableManagerListener> listeners;
|
||||
private boolean unavailable;
|
||||
private boolean handlingEvents;
|
||||
private final CuratorFrameworkFactory.Builder curatorBuilder;
|
||||
|
||||
public CuratorDistributedPrimitiveManager(Map<String, String> config) {
|
||||
this(validateParameters(config), true);
|
||||
}
|
||||
|
||||
private CuratorDistributedPrimitiveManager(Map<String, String> config, boolean ignore) {
|
||||
this(config.get(CONNECT_STRING_PARAM),
|
||||
config.get(NAMESPACE_PARAM),
|
||||
Integer.parseInt(config.getOrDefault(SESSION_MS_PARAM, DEFAULT_SESSION_TIMEOUT_MS)),
|
||||
Integer.parseInt(config.getOrDefault(SESSION_PERCENT_PARAM, DEFAULT_SESSION_PERCENT)),
|
||||
Integer.parseInt(config.getOrDefault(CONNECTION_MS_PARAM, DEFAULT_CONNECTION_TIMEOUT_MS)),
|
||||
Integer.parseInt(config.getOrDefault(RETRIES_PARAM, DEFAULT_RETRIES)),
|
||||
Integer.parseInt(config.getOrDefault(RETRIES_MS_PARAM, DEFAULT_RETRIES_MS)));
|
||||
}
|
||||
|
||||
private CuratorDistributedPrimitiveManager(String connectString,
|
||||
String namespace,
|
||||
int sessionMs,
|
||||
int sessionPercent,
|
||||
int connectionMs,
|
||||
int retries,
|
||||
int retriesMs) {
|
||||
curatorBuilder = CuratorFrameworkFactory.builder()
|
||||
.connectString(connectString)
|
||||
.namespace(namespace)
|
||||
.sessionTimeoutMs(sessionMs)
|
||||
.connectionTimeoutMs(connectionMs)
|
||||
.retryPolicy(retries >= 0 ? new RetryNTimes(retries, retriesMs) : new RetryForever(retriesMs))
|
||||
.simulatedSessionExpirationPercent(sessionPercent);
|
||||
this.primitives = new HashMap<>();
|
||||
this.listeners = null;
|
||||
this.unavailable = false;
|
||||
this.handlingEvents = false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized boolean isStarted() {
|
||||
checkHandlingEvents();
|
||||
return client != null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void addUnavailableManagerListener(UnavailableManagerListener listener) {
|
||||
checkHandlingEvents();
|
||||
if (listeners == null) {
|
||||
return;
|
||||
}
|
||||
listeners.add(listener);
|
||||
if (unavailable) {
|
||||
startHandlingEvents();
|
||||
try {
|
||||
listener.onUnavailableManagerEvent();
|
||||
} finally {
|
||||
completeHandlingEvents();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void removeUnavailableManagerListener(UnavailableManagerListener listener) {
|
||||
checkHandlingEvents();
|
||||
if (listeners == null) {
|
||||
return;
|
||||
}
|
||||
listeners.remove(listener);
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized boolean start(long timeout, TimeUnit unit) throws InterruptedException, ExecutionException {
|
||||
checkHandlingEvents();
|
||||
if (timeout >= 0) {
|
||||
if (timeout > Integer.MAX_VALUE) {
|
||||
throw new IllegalArgumentException("curator manager won't support too long timeout ie >" + Integer.MAX_VALUE);
|
||||
}
|
||||
requireNonNull(unit);
|
||||
}
|
||||
if (client != null) {
|
||||
return true;
|
||||
}
|
||||
final CuratorFramework client = curatorBuilder.build();
|
||||
try {
|
||||
client.start();
|
||||
if (!client.blockUntilConnected((int) timeout, unit)) {
|
||||
client.close();
|
||||
return false;
|
||||
}
|
||||
this.client = client;
|
||||
this.listeners = new CopyOnWriteArrayList<>();
|
||||
client.getConnectionStateListenable().addListener(this);
|
||||
return true;
|
||||
} catch (InterruptedException e) {
|
||||
client.close();
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void start() throws InterruptedException, ExecutionException {
|
||||
start(-1, null);
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void stop() {
|
||||
checkHandlingEvents();
|
||||
final CuratorFramework client = this.client;
|
||||
if (client == null) {
|
||||
return;
|
||||
}
|
||||
this.client = null;
|
||||
unavailable = false;
|
||||
listeners.clear();
|
||||
this.listeners = null;
|
||||
client.getConnectionStateListenable().removeListener(this);
|
||||
primitives.forEach((id, primitive) -> {
|
||||
try {
|
||||
primitive.onRemoved();
|
||||
} catch (Throwable t) {
|
||||
// TODO log?
|
||||
}
|
||||
});
|
||||
primitives.clear();
|
||||
client.close();
|
||||
}
|
||||
|
||||
private synchronized <T extends CuratorDistributedPrimitive> T getPrimitive(PrimitiveId id,
|
||||
Function<PrimitiveId, ? extends T> primitiveFactory) {
|
||||
checkHandlingEvents();
|
||||
requireNonNull(id);
|
||||
if (client == null) {
|
||||
throw new IllegalStateException("manager isn't started yet!");
|
||||
}
|
||||
final CuratorDistributedPrimitive primitive = PrimitiveType.validatePrimitiveInstance(primitives.get(id));
|
||||
if (primitive != null) {
|
||||
return (T) primitive;
|
||||
}
|
||||
final T newPrimitive = PrimitiveType.validatePrimitiveInstance(primitiveFactory.apply(id));
|
||||
primitives.put(id, newPrimitive);
|
||||
if (unavailable) {
|
||||
startHandlingEvents();
|
||||
try {
|
||||
newPrimitive.onLost();
|
||||
} finally {
|
||||
completeHandlingEvents();
|
||||
}
|
||||
}
|
||||
return newPrimitive;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DistributedLock getDistributedLock(String lockId) {
|
||||
return getPrimitive(PrimitiveId.of(lockId, PrimitiveType.lock),
|
||||
id -> new CuratorDistributedLock(id, this,
|
||||
new InterProcessSemaphoreV2(client, "/" + id.id + "/locks", 1)));
|
||||
}
|
||||
|
||||
@Override
|
||||
public MutableLong getMutableLong(String mutableLongId) {
|
||||
return getPrimitive(PrimitiveId.of(mutableLongId, PrimitiveType.mutableLong),
|
||||
id -> new CuratorMutableLong(id, this,
|
||||
new DistributedAtomicLong(client, "/" + mutableLongId + "/activation-sequence", new RetryNTimes(0, 0))));
|
||||
}
|
||||
|
||||
protected void startHandlingEvents() {
|
||||
handlingEvents = true;
|
||||
}
|
||||
|
||||
protected void completeHandlingEvents() {
|
||||
handlingEvents = false;
|
||||
}
|
||||
|
||||
protected void checkHandlingEvents() {
|
||||
if (client == null) {
|
||||
return;
|
||||
}
|
||||
if (handlingEvents) {
|
||||
throw new IllegalStateException("UnavailableManagerListener isn't supposed to modify the manager or its primitives on event handling!");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void stateChanged(CuratorFramework client, ConnectionState newState) {
|
||||
if (this.client != client) {
|
||||
return;
|
||||
}
|
||||
if (unavailable) {
|
||||
return;
|
||||
}
|
||||
startHandlingEvents();
|
||||
try {
|
||||
switch (newState) {
|
||||
case LOST:
|
||||
unavailable = true;
|
||||
listeners.forEach(listener -> listener.onUnavailableManagerEvent());
|
||||
primitives.forEach((id, primitive) -> primitive.onLost());
|
||||
break;
|
||||
case RECONNECTED:
|
||||
primitives.forEach((id, primitive) -> primitive.onReconnected());
|
||||
break;
|
||||
case SUSPENDED:
|
||||
primitives.forEach((id, primitive) -> primitive.onSuspended());
|
||||
break;
|
||||
}
|
||||
} finally {
|
||||
completeHandlingEvents();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Used for testing purposes
|
||||
*/
|
||||
public synchronized CuratorFramework getCurator() {
|
||||
checkHandlingEvents();
|
||||
return client;
|
||||
}
|
||||
|
||||
public synchronized void remove(CuratorDistributedPrimitive primitive) {
|
||||
checkHandlingEvents();
|
||||
primitives.remove(primitive.getId());
|
||||
}
|
||||
}
|
|
@ -0,0 +1,67 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.activemq.artemis.quorum.zookeeper;
|
||||
|
||||
import org.apache.activemq.artemis.quorum.MutableLong;
|
||||
import org.apache.activemq.artemis.quorum.UnavailableStateException;
|
||||
import org.apache.activemq.artemis.quorum.zookeeper.CuratorDistributedPrimitiveManager.PrimitiveId;
|
||||
import org.apache.curator.framework.recipes.atomic.AtomicValue;
|
||||
import org.apache.curator.framework.recipes.atomic.DistributedAtomicLong;
|
||||
|
||||
final class CuratorMutableLong extends CuratorDistributedPrimitive implements MutableLong {
|
||||
|
||||
private final DistributedAtomicLong atomicLong;
|
||||
|
||||
CuratorMutableLong(PrimitiveId id, CuratorDistributedPrimitiveManager manager, DistributedAtomicLong atomicLong) {
|
||||
super(id, manager);
|
||||
this.atomicLong = atomicLong;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getMutableLongId() {
|
||||
return getId().id;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long get() throws UnavailableStateException {
|
||||
return run(() -> {
|
||||
checkUnavailable();
|
||||
try {
|
||||
AtomicValue<Long> atomicValue = atomicLong.get();
|
||||
if (!atomicValue.succeeded()) {
|
||||
throw new UnavailableStateException("cannot query long " + getId());
|
||||
}
|
||||
return atomicValue.postValue();
|
||||
} catch (Throwable e) {
|
||||
throw new UnavailableStateException(e);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@Override
|
||||
public void set(long value) throws UnavailableStateException {
|
||||
run(() -> {
|
||||
checkUnavailable();
|
||||
try {
|
||||
atomicLong.forceSet(value);
|
||||
return null;
|
||||
} catch (Throwable e) {
|
||||
throw new UnavailableStateException(e);
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
|
@ -0,0 +1,298 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.activemq.artemis.quorum;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.TimeoutException;
|
||||
import java.util.function.Consumer;
|
||||
|
||||
import org.junit.After;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
import static org.hamcrest.MatcherAssert.assertThat;
|
||||
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
|
||||
|
||||
public abstract class DistributedLockTest {
|
||||
|
||||
private final ArrayList<AutoCloseable> closeables = new ArrayList<>();
|
||||
|
||||
@Before
|
||||
public void setupEnv() throws Throwable {
|
||||
}
|
||||
|
||||
protected abstract void configureManager(Map<String, String> config);
|
||||
|
||||
protected abstract String managerClassName();
|
||||
|
||||
@After
|
||||
public void tearDownEnv() throws Throwable {
|
||||
closeables.forEach(closeables -> {
|
||||
try {
|
||||
closeables.close();
|
||||
} catch (Throwable t) {
|
||||
// silent here
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
protected DistributedPrimitiveManager createManagedDistributeManager() {
|
||||
return createManagedDistributeManager(stringStringMap -> {
|
||||
});
|
||||
}
|
||||
|
||||
protected DistributedPrimitiveManager createManagedDistributeManager(Consumer<? super Map<String, String>> defaultConfiguration) {
|
||||
try {
|
||||
final HashMap<String, String> config = new HashMap<>();
|
||||
configureManager(config);
|
||||
defaultConfiguration.accept(config);
|
||||
final DistributedPrimitiveManager manager = DistributedPrimitiveManager.newInstanceOf(managerClassName(), config);
|
||||
closeables.add(manager);
|
||||
return manager;
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void managerReturnsSameLockIfNotClosed() throws ExecutionException, InterruptedException, TimeoutException {
|
||||
DistributedPrimitiveManager manager = createManagedDistributeManager();
|
||||
manager.start();
|
||||
Assert.assertSame(manager.getDistributedLock("a"), manager.getDistributedLock("a"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void managerReturnsDifferentLocksIfClosed() throws ExecutionException, InterruptedException, TimeoutException {
|
||||
DistributedPrimitiveManager manager = createManagedDistributeManager();
|
||||
manager.start();
|
||||
DistributedLock closedLock = manager.getDistributedLock("a");
|
||||
closedLock.close();
|
||||
Assert.assertNotSame(closedLock, manager.getDistributedLock("a"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void managerReturnsDifferentLocksOnRestart() throws ExecutionException, InterruptedException, TimeoutException {
|
||||
DistributedPrimitiveManager manager = createManagedDistributeManager();
|
||||
manager.start();
|
||||
DistributedLock closedLock = manager.getDistributedLock("a");
|
||||
manager.stop();
|
||||
manager.start();
|
||||
Assert.assertNotSame(closedLock, manager.getDistributedLock("a"));
|
||||
}
|
||||
|
||||
@Test(expected = IllegalStateException.class)
|
||||
public void managerCannotGetLockIfNotStarted() throws ExecutionException, InterruptedException, TimeoutException {
|
||||
DistributedPrimitiveManager manager = createManagedDistributeManager();
|
||||
manager.getDistributedLock("a");
|
||||
}
|
||||
|
||||
@Test(expected = NullPointerException.class)
|
||||
public void managerCannotGetLockWithNullLockId() throws ExecutionException, InterruptedException, TimeoutException {
|
||||
DistributedPrimitiveManager manager = createManagedDistributeManager();
|
||||
manager.start();
|
||||
manager.getDistributedLock(null);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void closingLockUnlockIt() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
|
||||
DistributedPrimitiveManager manager = createManagedDistributeManager();
|
||||
manager.start();
|
||||
DistributedLock closedLock = manager.getDistributedLock("a");
|
||||
Assert.assertTrue(closedLock.tryLock());
|
||||
closedLock.close();
|
||||
Assert.assertTrue(manager.getDistributedLock("a").tryLock());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void managerStopUnlockLocks() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
|
||||
DistributedPrimitiveManager manager = createManagedDistributeManager();
|
||||
manager.start();
|
||||
Assert.assertTrue(manager.getDistributedLock("a").tryLock());
|
||||
Assert.assertTrue(manager.getDistributedLock("b").tryLock());
|
||||
manager.stop();
|
||||
manager.start();
|
||||
Assert.assertFalse(manager.getDistributedLock("a").isHeldByCaller());
|
||||
Assert.assertFalse(manager.getDistributedLock("b").isHeldByCaller());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void acquireAndReleaseLock() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
|
||||
DistributedPrimitiveManager manager = createManagedDistributeManager();
|
||||
manager.start();
|
||||
DistributedLock lock = manager.getDistributedLock("a");
|
||||
Assert.assertFalse(lock.isHeldByCaller());
|
||||
Assert.assertTrue(lock.tryLock());
|
||||
Assert.assertTrue(lock.isHeldByCaller());
|
||||
lock.unlock();
|
||||
Assert.assertFalse(lock.isHeldByCaller());
|
||||
}
|
||||
|
||||
@Test(expected = IllegalStateException.class)
|
||||
public void cannotAcquireSameLockTwice() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
|
||||
DistributedPrimitiveManager manager = createManagedDistributeManager();
|
||||
manager.start();
|
||||
DistributedLock lock = manager.getDistributedLock("a");
|
||||
Assert.assertTrue(lock.tryLock());
|
||||
lock.tryLock();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void heldLockIsVisibleByDifferentManagers() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
|
||||
DistributedPrimitiveManager ownerManager = createManagedDistributeManager();
|
||||
DistributedPrimitiveManager observerManager = createManagedDistributeManager();
|
||||
ownerManager.start();
|
||||
observerManager.start();
|
||||
Assert.assertTrue(ownerManager.getDistributedLock("a").tryLock());
|
||||
Assert.assertTrue(ownerManager.getDistributedLock("a").isHeldByCaller());
|
||||
Assert.assertFalse(observerManager.getDistributedLock("a").isHeldByCaller());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void unlockedLockIsVisibleByDifferentManagers() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
|
||||
DistributedPrimitiveManager ownerManager = createManagedDistributeManager();
|
||||
DistributedPrimitiveManager observerManager = createManagedDistributeManager();
|
||||
ownerManager.start();
|
||||
observerManager.start();
|
||||
Assert.assertTrue(ownerManager.getDistributedLock("a").tryLock());
|
||||
ownerManager.getDistributedLock("a").unlock();
|
||||
Assert.assertFalse(observerManager.getDistributedLock("a").isHeldByCaller());
|
||||
Assert.assertFalse(ownerManager.getDistributedLock("a").isHeldByCaller());
|
||||
Assert.assertTrue(observerManager.getDistributedLock("a").tryLock());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void cannotAcquireSameLockFromDifferentManagers() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
|
||||
DistributedPrimitiveManager ownerManager = createManagedDistributeManager();
|
||||
DistributedPrimitiveManager notOwnerManager = createManagedDistributeManager();
|
||||
ownerManager.start();
|
||||
notOwnerManager.start();
|
||||
Assert.assertTrue(ownerManager.getDistributedLock("a").tryLock());
|
||||
Assert.assertFalse(notOwnerManager.getDistributedLock("a").tryLock());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void cannotUnlockFromNotOwnerManager() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
|
||||
DistributedPrimitiveManager ownerManager = createManagedDistributeManager();
|
||||
DistributedPrimitiveManager notOwnerManager = createManagedDistributeManager();
|
||||
ownerManager.start();
|
||||
notOwnerManager.start();
|
||||
Assert.assertTrue(ownerManager.getDistributedLock("a").tryLock());
|
||||
notOwnerManager.getDistributedLock("a").unlock();
|
||||
Assert.assertFalse(notOwnerManager.getDistributedLock("a").isHeldByCaller());
|
||||
Assert.assertTrue(ownerManager.getDistributedLock("a").isHeldByCaller());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void timedTryLockSucceedWithShortTimeout() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
|
||||
DistributedPrimitiveManager manager = createManagedDistributeManager();
|
||||
manager.start();
|
||||
DistributedLock backgroundLock = manager.getDistributedLock("a");
|
||||
Assert.assertTrue(backgroundLock.tryLock(1, TimeUnit.NANOSECONDS));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void timedTryLockFailAfterTimeout() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
|
||||
DistributedPrimitiveManager manager = createManagedDistributeManager();
|
||||
manager.start();
|
||||
DistributedPrimitiveManager otherManager = createManagedDistributeManager();
|
||||
otherManager.start();
|
||||
Assert.assertTrue(otherManager.getDistributedLock("a").tryLock());
|
||||
final long start = System.nanoTime();
|
||||
final long timeoutSec = 1;
|
||||
Assert.assertFalse(manager.getDistributedLock("a").tryLock(timeoutSec, TimeUnit.SECONDS));
|
||||
final long elapsed = TimeUnit.NANOSECONDS.toSeconds(System.nanoTime() - start);
|
||||
assertThat(elapsed, greaterThanOrEqualTo(timeoutSec));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void timedTryLockSuccess() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
|
||||
DistributedPrimitiveManager manager = createManagedDistributeManager();
|
||||
manager.start();
|
||||
DistributedPrimitiveManager otherManager = createManagedDistributeManager();
|
||||
otherManager.start();
|
||||
Assert.assertTrue(otherManager.getDistributedLock("a").tryLock());
|
||||
DistributedLock backgroundLock = manager.getDistributedLock("a");
|
||||
CompletableFuture<Boolean> acquired = new CompletableFuture<>();
|
||||
CountDownLatch startedTry = new CountDownLatch(1);
|
||||
Thread tryLockThread = new Thread(() -> {
|
||||
startedTry.countDown();
|
||||
try {
|
||||
if (!backgroundLock.tryLock(Long.MAX_VALUE, TimeUnit.DAYS)) {
|
||||
acquired.complete(false);
|
||||
} else {
|
||||
acquired.complete(true);
|
||||
}
|
||||
} catch (Throwable e) {
|
||||
acquired.complete(false);
|
||||
}
|
||||
});
|
||||
tryLockThread.start();
|
||||
Assert.assertTrue(startedTry.await(10, TimeUnit.SECONDS));
|
||||
otherManager.getDistributedLock("a").unlock();
|
||||
Assert.assertTrue(acquired.get(4, TimeUnit.SECONDS));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void interruptStopTimedTryLock() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
|
||||
DistributedPrimitiveManager manager = createManagedDistributeManager();
|
||||
manager.start();
|
||||
DistributedPrimitiveManager otherManager = createManagedDistributeManager();
|
||||
otherManager.start();
|
||||
Assert.assertTrue(otherManager.getDistributedLock("a").tryLock());
|
||||
DistributedLock backgroundLock = manager.getDistributedLock("a");
|
||||
CompletableFuture<Boolean> interrupted = new CompletableFuture<>();
|
||||
CountDownLatch startedTry = new CountDownLatch(1);
|
||||
Thread tryLockThread = new Thread(() -> {
|
||||
startedTry.countDown();
|
||||
try {
|
||||
backgroundLock.tryLock(Long.MAX_VALUE, TimeUnit.DAYS);
|
||||
interrupted.complete(false);
|
||||
} catch (UnavailableStateException e) {
|
||||
interrupted.complete(false);
|
||||
} catch (InterruptedException e) {
|
||||
interrupted.complete(true);
|
||||
}
|
||||
});
|
||||
tryLockThread.start();
|
||||
Assert.assertTrue(startedTry.await(10, TimeUnit.SECONDS));
|
||||
// let background lock to perform some tries
|
||||
TimeUnit.SECONDS.sleep(1);
|
||||
tryLockThread.interrupt();
|
||||
Assert.assertTrue(interrupted.get(4, TimeUnit.SECONDS));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void lockAndMutableLongWithSameIdCanExistsTogether() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
|
||||
DistributedPrimitiveManager manager = createManagedDistributeManager();
|
||||
manager.start();
|
||||
final String id = "a";
|
||||
Assert.assertTrue(manager.getDistributedLock(id).tryLock());
|
||||
Assert.assertEquals(0, manager.getMutableLong(id).get());
|
||||
manager.getMutableLong(id).set(1);
|
||||
Assert.assertTrue(manager.getDistributedLock(id).isHeldByCaller());
|
||||
Assert.assertEquals(1, manager.getMutableLong(id).get());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,70 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.activemq.artemis.quorum.file;
|
||||
|
||||
import java.io.File;
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
import java.util.Collections;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.activemq.artemis.quorum.DistributedLockTest;
|
||||
import org.apache.activemq.artemis.quorum.DistributedPrimitiveManager;
|
||||
import org.junit.Before;
|
||||
import org.junit.Rule;
|
||||
import org.junit.Test;
|
||||
import org.junit.rules.TemporaryFolder;
|
||||
|
||||
public class FileDistributedLockTest extends DistributedLockTest {
|
||||
|
||||
@Rule
|
||||
public TemporaryFolder tmpFolder = new TemporaryFolder();
|
||||
|
||||
private File locksFolder;
|
||||
|
||||
@Before
|
||||
@Override
|
||||
public void setupEnv() throws Throwable {
|
||||
locksFolder = tmpFolder.newFolder("locks-folder");
|
||||
super.setupEnv();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void configureManager(Map<String, String> config) {
|
||||
config.put("locks-folder", locksFolder.toString());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String managerClassName() {
|
||||
return FileBasedPrimitiveManager.class.getName();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void reflectiveManagerCreation() throws Exception {
|
||||
DistributedPrimitiveManager.newInstanceOf(managerClassName(), Collections.singletonMap("locks-folder", locksFolder.toString()));
|
||||
}
|
||||
|
||||
@Test(expected = InvocationTargetException.class)
|
||||
public void reflectiveManagerCreationFailWithoutLocksFolder() throws Exception {
|
||||
DistributedPrimitiveManager.newInstanceOf(managerClassName(), Collections.emptyMap());
|
||||
}
|
||||
|
||||
@Test(expected = InvocationTargetException.class)
|
||||
public void reflectiveManagerCreationFailIfLocksFolderIsNotFolder() throws Exception {
|
||||
DistributedPrimitiveManager.newInstanceOf(managerClassName(), Collections.singletonMap("locks-folder", tmpFolder.newFile().toString()));
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,364 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.activemq.artemis.quorum.zookeeper;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.TimeoutException;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import com.google.common.base.Predicates;
|
||||
import org.apache.activemq.artemis.quorum.DistributedLock;
|
||||
import org.apache.activemq.artemis.quorum.DistributedPrimitiveManager;
|
||||
import org.apache.activemq.artemis.quorum.UnavailableStateException;
|
||||
import org.apache.activemq.artemis.utils.Wait;
|
||||
import org.apache.curator.test.InstanceSpec;
|
||||
import org.apache.curator.test.TestingCluster;
|
||||
|
||||
import org.apache.activemq.artemis.quorum.DistributedLockTest;
|
||||
import org.apache.curator.test.TestingZooKeeperServer;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Assume;
|
||||
import org.junit.Rule;
|
||||
import org.junit.Test;
|
||||
import org.junit.rules.TemporaryFolder;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.Parameterized;
|
||||
|
||||
import static java.lang.Boolean.TRUE;
|
||||
import static org.hamcrest.Matchers.greaterThan;
|
||||
|
||||
@RunWith(value = Parameterized.class)
|
||||
public class CuratorDistributedLockTest extends DistributedLockTest {
|
||||
|
||||
private static final int BASE_SERVER_PORT = 6666;
|
||||
private static final int CONNECTION_MS = 2000;
|
||||
// Beware: the server tick must be small enough that to let the session to be correctly expired
|
||||
private static final int SESSION_MS = 6000;
|
||||
private static final int SERVER_TICK_MS = 2000;
|
||||
private static final int RETRIES_MS = 100;
|
||||
private static final int RETRIES = 1;
|
||||
|
||||
@Parameterized.Parameter
|
||||
public int nodes;
|
||||
@Rule
|
||||
public TemporaryFolder tmpFolder = new TemporaryFolder();
|
||||
private TestingCluster testingServer;
|
||||
private InstanceSpec[] clusterSpecs;
|
||||
private String connectString;
|
||||
|
||||
@Parameterized.Parameters(name = "nodes={0}")
|
||||
public static Iterable<Object[]> getTestParameters() {
|
||||
return Arrays.asList(new Object[][]{{3}, {5}});
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setupEnv() throws Throwable {
|
||||
clusterSpecs = new InstanceSpec[nodes];
|
||||
for (int i = 0; i < nodes; i++) {
|
||||
clusterSpecs[i] = new InstanceSpec(tmpFolder.newFolder(), BASE_SERVER_PORT + i, -1, -1, true, -1, SERVER_TICK_MS, -1);
|
||||
}
|
||||
testingServer = new TestingCluster(clusterSpecs);
|
||||
testingServer.start();
|
||||
// start waits for quorumPeer!=null but not that it has started...
|
||||
Wait.waitFor(this::ensembleHasLeader);
|
||||
connectString = testingServer.getConnectString();
|
||||
super.setupEnv();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDownEnv() throws Throwable {
|
||||
super.tearDownEnv();
|
||||
testingServer.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void configureManager(Map<String, String> config) {
|
||||
config.put("connect-string", connectString);
|
||||
config.put("session-ms", Integer.toString(SESSION_MS));
|
||||
config.put("connection-ms", Integer.toString(CONNECTION_MS));
|
||||
config.put("retries", Integer.toString(RETRIES));
|
||||
config.put("retries-ms", Integer.toString(RETRIES_MS));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String managerClassName() {
|
||||
return CuratorDistributedPrimitiveManager.class.getName();
|
||||
}
|
||||
|
||||
@Test(expected = RuntimeException.class)
|
||||
public void cannotCreateManagerWithNotValidParameterNames() {
|
||||
final DistributedPrimitiveManager manager = createManagedDistributeManager(config -> config.put("_", "_"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void canAcquireLocksFromDifferentNamespace() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
|
||||
final DistributedPrimitiveManager manager1 = createManagedDistributeManager(config -> config.put("namespace", "1"));
|
||||
manager1.start();
|
||||
final DistributedPrimitiveManager manager2 = createManagedDistributeManager(config -> config.put("namespace", "2"));
|
||||
manager2.start();
|
||||
Assert.assertTrue(manager1.getDistributedLock("a").tryLock());
|
||||
Assert.assertTrue(manager2.getDistributedLock("a").tryLock());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void cannotStartManagerWithDisconnectedServer() throws IOException, ExecutionException, InterruptedException {
|
||||
final DistributedPrimitiveManager manager = createManagedDistributeManager();
|
||||
testingServer.close();
|
||||
Assert.assertFalse(manager.start(1, TimeUnit.SECONDS));
|
||||
}
|
||||
|
||||
@Test(expected = UnavailableStateException.class)
|
||||
public void cannotAcquireLockWithDisconnectedServer() throws IOException, ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
|
||||
final DistributedPrimitiveManager manager = createManagedDistributeManager();
|
||||
manager.start();
|
||||
final DistributedLock lock = manager.getDistributedLock("a");
|
||||
final CountDownLatch notAvailable = new CountDownLatch(1);
|
||||
final DistributedLock.UnavailableLockListener listener = notAvailable::countDown;
|
||||
lock.addListener(listener);
|
||||
testingServer.close();
|
||||
Assert.assertTrue(notAvailable.await(30, TimeUnit.SECONDS));
|
||||
lock.tryLock();
|
||||
}
|
||||
|
||||
@Test(expected = UnavailableStateException.class)
|
||||
public void cannotTryLockWithDisconnectedServer() throws IOException, ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
|
||||
final DistributedPrimitiveManager manager = createManagedDistributeManager();
|
||||
manager.start();
|
||||
final DistributedLock lock = manager.getDistributedLock("a");
|
||||
testingServer.close();
|
||||
lock.tryLock();
|
||||
}
|
||||
|
||||
@Test(expected = UnavailableStateException.class)
|
||||
public void cannotCheckLockStatusWithDisconnectedServer() throws IOException, ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
|
||||
final DistributedPrimitiveManager manager = createManagedDistributeManager();
|
||||
manager.start();
|
||||
final DistributedLock lock = manager.getDistributedLock("a");
|
||||
Assert.assertFalse(lock.isHeldByCaller());
|
||||
Assert.assertTrue(lock.tryLock());
|
||||
testingServer.close();
|
||||
lock.isHeldByCaller();
|
||||
}
|
||||
|
||||
@Test(expected = UnavailableStateException.class)
|
||||
public void looseLockAfterServerStop() throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException, IOException {
|
||||
final DistributedPrimitiveManager manager = createManagedDistributeManager();
|
||||
manager.start();
|
||||
final DistributedLock lock = manager.getDistributedLock("a");
|
||||
Assert.assertTrue(lock.tryLock());
|
||||
Assert.assertTrue(lock.isHeldByCaller());
|
||||
final CountDownLatch notAvailable = new CountDownLatch(1);
|
||||
final DistributedLock.UnavailableLockListener listener = notAvailable::countDown;
|
||||
lock.addListener(listener);
|
||||
Assert.assertEquals(1, notAvailable.getCount());
|
||||
testingServer.close();
|
||||
Assert.assertTrue(notAvailable.await(30, TimeUnit.SECONDS));
|
||||
lock.isHeldByCaller();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void canAcquireLockOnMajorityRestart() throws Exception {
|
||||
final DistributedPrimitiveManager manager = createManagedDistributeManager();
|
||||
manager.start();
|
||||
final DistributedLock lock = manager.getDistributedLock("a");
|
||||
Assert.assertTrue(lock.tryLock());
|
||||
Assert.assertTrue(lock.isHeldByCaller());
|
||||
final CountDownLatch notAvailable = new CountDownLatch(1);
|
||||
final DistributedLock.UnavailableLockListener listener = notAvailable::countDown;
|
||||
lock.addListener(listener);
|
||||
Assert.assertEquals(1, notAvailable.getCount());
|
||||
testingServer.stop();
|
||||
notAvailable.await();
|
||||
manager.stop();
|
||||
restartMajorityNodes(true);
|
||||
final DistributedPrimitiveManager otherManager = createManagedDistributeManager();
|
||||
otherManager.start();
|
||||
// await more then the expected value, that depends by how curator session expiration is configured
|
||||
TimeUnit.MILLISECONDS.sleep(SESSION_MS + SERVER_TICK_MS);
|
||||
Assert.assertTrue(otherManager.getDistributedLock("a").tryLock());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void cannotStartManagerWithoutQuorum() throws Exception {
|
||||
Assume.assumeThat(nodes, greaterThan(1));
|
||||
DistributedPrimitiveManager manager = createManagedDistributeManager();
|
||||
stopMajorityNotLeaderNodes(true);
|
||||
Assert.assertFalse(manager.start(2, TimeUnit.SECONDS));
|
||||
Assert.assertFalse(manager.isStarted());
|
||||
}
|
||||
|
||||
@Test(expected = UnavailableStateException.class)
|
||||
public void cannotAcquireLockWithoutQuorum() throws Exception {
|
||||
Assume.assumeThat(nodes, greaterThan(1));
|
||||
DistributedPrimitiveManager manager = createManagedDistributeManager();
|
||||
manager.start();
|
||||
stopMajorityNotLeaderNodes(true);
|
||||
DistributedLock lock = manager.getDistributedLock("a");
|
||||
lock.tryLock();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void cannotCheckLockWithoutQuorum() throws Exception {
|
||||
Assume.assumeThat(nodes, greaterThan(1));
|
||||
DistributedPrimitiveManager manager = createManagedDistributeManager();
|
||||
manager.start();
|
||||
stopMajorityNotLeaderNodes(true);
|
||||
DistributedLock lock = manager.getDistributedLock("a");
|
||||
final boolean held;
|
||||
try {
|
||||
held = lock.isHeldByCaller();
|
||||
} catch (UnavailableStateException expected) {
|
||||
return;
|
||||
}
|
||||
Assert.assertFalse(held);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void canGetLockWithoutQuorum() throws Exception {
|
||||
Assume.assumeThat(nodes, greaterThan(1));
|
||||
DistributedPrimitiveManager manager = createManagedDistributeManager();
|
||||
manager.start();
|
||||
stopMajorityNotLeaderNodes(true);
|
||||
DistributedLock lock = manager.getDistributedLock("a");
|
||||
Assert.assertNotNull(lock);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void notifiedAsUnavailableWhileLoosingQuorum() throws Exception {
|
||||
Assume.assumeThat(nodes, greaterThan(1));
|
||||
DistributedPrimitiveManager manager = createManagedDistributeManager();
|
||||
manager.start();
|
||||
DistributedLock lock = manager.getDistributedLock("a");
|
||||
CountDownLatch unavailable = new CountDownLatch(1);
|
||||
lock.addListener(unavailable::countDown);
|
||||
stopMajorityNotLeaderNodes(true);
|
||||
Assert.assertTrue(unavailable.await(SESSION_MS + SERVER_TICK_MS, TimeUnit.MILLISECONDS));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void beNotifiedOnce() throws Exception {
|
||||
Assume.assumeThat(nodes, greaterThan(1));
|
||||
DistributedPrimitiveManager manager = createManagedDistributeManager();
|
||||
manager.start();
|
||||
DistributedLock lock = manager.getDistributedLock("a");
|
||||
final AtomicInteger unavailableManager = new AtomicInteger(0);
|
||||
final AtomicInteger unavailableLock = new AtomicInteger(0);
|
||||
manager.addUnavailableManagerListener(unavailableManager::incrementAndGet);
|
||||
lock.addListener(unavailableLock::incrementAndGet);
|
||||
stopMajorityNotLeaderNodes(true);
|
||||
TimeUnit.MILLISECONDS.sleep(SESSION_MS + SERVER_TICK_MS + CONNECTION_MS);
|
||||
Assert.assertEquals(1, unavailableLock.get());
|
||||
Assert.assertEquals(1, unavailableManager.get());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void beNotifiedOfUnavailabilityWhileBlockedOnTimedLock() throws Exception {
|
||||
Assume.assumeThat(nodes, greaterThan(1));
|
||||
DistributedPrimitiveManager manager = createManagedDistributeManager();
|
||||
manager.start();
|
||||
DistributedLock lock = manager.getDistributedLock("a");
|
||||
final AtomicInteger unavailableManager = new AtomicInteger(0);
|
||||
final AtomicInteger unavailableLock = new AtomicInteger(0);
|
||||
manager.addUnavailableManagerListener(unavailableManager::incrementAndGet);
|
||||
lock.addListener(unavailableLock::incrementAndGet);
|
||||
final DistributedPrimitiveManager otherManager = createManagedDistributeManager();
|
||||
otherManager.start();
|
||||
Assert.assertTrue(otherManager.getDistributedLock("a").tryLock());
|
||||
final CountDownLatch startedTimedLock = new CountDownLatch(1);
|
||||
final AtomicReference<Boolean> unavailableTimedLock = new AtomicReference<>(null);
|
||||
Thread timedLock = new Thread(() -> {
|
||||
startedTimedLock.countDown();
|
||||
try {
|
||||
lock.tryLock(Long.MAX_VALUE, TimeUnit.DAYS);
|
||||
unavailableTimedLock.set(false);
|
||||
} catch (UnavailableStateException e) {
|
||||
unavailableTimedLock.set(true);
|
||||
} catch (InterruptedException e) {
|
||||
unavailableTimedLock.set(false);
|
||||
}
|
||||
});
|
||||
timedLock.start();
|
||||
Assert.assertTrue(startedTimedLock.await(10, TimeUnit.SECONDS));
|
||||
TimeUnit.SECONDS.sleep(1);
|
||||
stopMajorityNotLeaderNodes(true);
|
||||
TimeUnit.MILLISECONDS.sleep(SESSION_MS + CONNECTION_MS);
|
||||
Wait.waitFor(() -> unavailableLock.get() > 0, SERVER_TICK_MS);
|
||||
Assert.assertEquals(1, unavailableManager.get());
|
||||
Assert.assertEquals(TRUE, unavailableTimedLock.get());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void beNotifiedOfAlreadyUnavailableManagerAfterAddingListener() throws Exception {
|
||||
DistributedPrimitiveManager manager = createManagedDistributeManager();
|
||||
manager.start();
|
||||
final AtomicBoolean unavailable = new AtomicBoolean(false);
|
||||
DistributedPrimitiveManager.UnavailableManagerListener managerListener = () -> {
|
||||
unavailable.set(true);
|
||||
};
|
||||
manager.addUnavailableManagerListener(managerListener);
|
||||
Assert.assertFalse(unavailable.get());
|
||||
stopMajorityNotLeaderNodes(true);
|
||||
Wait.waitFor(unavailable::get);
|
||||
manager.removeUnavailableManagerListener(managerListener);
|
||||
final AtomicInteger unavailableOnRegister = new AtomicInteger();
|
||||
manager.addUnavailableManagerListener(unavailableOnRegister::incrementAndGet);
|
||||
Assert.assertEquals(1, unavailableOnRegister.get());
|
||||
unavailableOnRegister.set(0);
|
||||
try (DistributedLock lock = manager.getDistributedLock("a")) {
|
||||
lock.addListener(unavailableOnRegister::incrementAndGet);
|
||||
Assert.assertEquals(1, unavailableOnRegister.get());
|
||||
}
|
||||
}
|
||||
|
||||
private boolean ensembleHasLeader() {
|
||||
return testingServer.getServers().stream().filter(CuratorDistributedLockTest::isLeader).count() != 0;
|
||||
}
|
||||
|
||||
private static boolean isLeader(TestingZooKeeperServer server) {
|
||||
long leaderId = server.getQuorumPeer().getLeaderId();
|
||||
long id = server.getQuorumPeer().getId();
|
||||
return id == leaderId;
|
||||
}
|
||||
|
||||
private void stopMajorityNotLeaderNodes(boolean fromLast) throws Exception {
|
||||
List<TestingZooKeeperServer> followers = testingServer.getServers().stream().filter(Predicates.not(CuratorDistributedLockTest::isLeader)).collect(Collectors.toList());
|
||||
final int quorum = (nodes / 2) + 1;
|
||||
for (int i = 0; i < quorum; i++) {
|
||||
final int nodeIndex = fromLast ? (followers.size() - 1) - i : i;
|
||||
followers.get(nodeIndex).stop();
|
||||
}
|
||||
}
|
||||
|
||||
private void restartMajorityNodes(boolean startFromLast) throws Exception {
|
||||
final int quorum = (nodes / 2) + 1;
|
||||
for (int i = 0; i < quorum; i++) {
|
||||
final int nodeIndex = startFromLast ? (nodes - 1) - i : i;
|
||||
if (!testingServer.restartServer(clusterSpecs[nodeIndex])) {
|
||||
throw new IllegalStateException("errored while restarting " + clusterSpecs[nodeIndex]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,140 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.activemq.artemis.quorum.zookeeper;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.function.Consumer;
|
||||
|
||||
import org.apache.activemq.artemis.quorum.DistributedPrimitiveManager;
|
||||
import org.apache.curator.framework.CuratorFramework;
|
||||
import org.apache.curator.test.InstanceSpec;
|
||||
import org.apache.curator.test.TestingCluster;
|
||||
import org.apache.curator.utils.ZKPaths;
|
||||
import org.apache.zookeeper.KeeperException;
|
||||
import org.apache.zookeeper.ZooKeeper;
|
||||
import org.apache.zookeeper.data.Stat;
|
||||
import org.junit.After;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Before;
|
||||
import org.junit.Rule;
|
||||
import org.junit.Test;
|
||||
import org.junit.rules.TemporaryFolder;
|
||||
|
||||
public class CuratorDistributedPrimitiveManagerTest {
|
||||
|
||||
private final ArrayList<AutoCloseable> autoCloseables = new ArrayList<>();
|
||||
|
||||
private static final int BASE_SERVER_PORT = 6666;
|
||||
private static final int CONNECTION_MS = 2000;
|
||||
// Beware: the server tick must be small enough that to let the session to be correctly expired
|
||||
private static final int SESSION_MS = 6000;
|
||||
private static final int SERVER_TICK_MS = 2000;
|
||||
private static final int RETRIES_MS = 100;
|
||||
private static final int RETRIES = 1;
|
||||
|
||||
public int nodes = 1;
|
||||
@Rule
|
||||
public TemporaryFolder tmpFolder = new TemporaryFolder();
|
||||
private TestingCluster testingServer;
|
||||
private String connectString;
|
||||
|
||||
|
||||
@Before
|
||||
public void setupEnv() throws Throwable {
|
||||
InstanceSpec[] clusterSpecs = new InstanceSpec[nodes];
|
||||
for (int i = 0; i < nodes; i++) {
|
||||
clusterSpecs[i] = new InstanceSpec(tmpFolder.newFolder(), BASE_SERVER_PORT + i, -1, -1, true, -1, SERVER_TICK_MS, -1);
|
||||
}
|
||||
testingServer = new TestingCluster(clusterSpecs);
|
||||
testingServer.start();
|
||||
connectString = testingServer.getConnectString();
|
||||
}
|
||||
|
||||
@After
|
||||
public void tearDownEnv() throws Throwable {
|
||||
autoCloseables.forEach(closeables -> {
|
||||
try {
|
||||
closeables.close();
|
||||
} catch (Throwable t) {
|
||||
// silent here
|
||||
}
|
||||
});
|
||||
testingServer.close();
|
||||
}
|
||||
|
||||
protected void configureManager(Map<String, String> config) {
|
||||
config.put("connect-string", connectString);
|
||||
config.put("session-ms", Integer.toString(SESSION_MS));
|
||||
config.put("connection-ms", Integer.toString(CONNECTION_MS));
|
||||
config.put("retries", Integer.toString(RETRIES));
|
||||
config.put("retries-ms", Integer.toString(RETRIES_MS));
|
||||
}
|
||||
|
||||
protected DistributedPrimitiveManager createManagedDistributeManager(Consumer<? super Map<String, String>> defaultConfiguration) {
|
||||
try {
|
||||
final HashMap<String, String> config = new HashMap<>();
|
||||
configureManager(config);
|
||||
defaultConfiguration.accept(config);
|
||||
final DistributedPrimitiveManager manager = DistributedPrimitiveManager.newInstanceOf(managerClassName(), config);
|
||||
autoCloseables.add(manager);
|
||||
return manager;
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
protected String managerClassName() {
|
||||
return CuratorDistributedPrimitiveManager.class.getName();
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void verifyLayoutInZK() throws Exception {
|
||||
final DistributedPrimitiveManager manager = createManagedDistributeManager(config -> config.put("namespace", "activemq-artemis"));
|
||||
manager.start();
|
||||
Assert.assertTrue(manager.getDistributedLock("journal-identity-000-111").tryLock());
|
||||
|
||||
Assert.assertTrue(manager.getMutableLong("journal-identity-000-111").compareAndSet(0, 1));
|
||||
|
||||
CuratorFramework curatorFramework = ((CuratorDistributedPrimitiveManager)manager).getCurator();
|
||||
List<String> entries = new LinkedList<>();
|
||||
dumpZK(curatorFramework.getZookeeperClient().getZooKeeper(), "/", entries);
|
||||
|
||||
Assert.assertTrue(entries.get(2).contains("activation-sequence"));
|
||||
|
||||
for (String entry: entries) {
|
||||
System.err.println("ZK: " + entry);
|
||||
}
|
||||
}
|
||||
|
||||
private void dumpZK(ZooKeeper zooKeeper, String path, List<String> entries) throws InterruptedException, KeeperException {
|
||||
List<String> children = ZKPaths.getSortedChildren(zooKeeper,path);
|
||||
for (String s: children) {
|
||||
if (!s.equals("zookeeper")) {
|
||||
String qualifiedPath = (path.endsWith("/") ? path : path + "/") + s;
|
||||
Stat stat = new Stat();
|
||||
zooKeeper.getData(qualifiedPath, null, stat);
|
||||
entries.add(qualifiedPath + ", data-len:" + stat.getDataLength() + ", ephemeral: " + (stat.getEphemeralOwner() != 0));
|
||||
dumpZK(zooKeeper, qualifiedPath, entries);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -85,6 +85,11 @@
|
|||
<artifactId>artemis-core-client</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.activemq</groupId>
|
||||
<artifactId>artemis-quorum-api</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.activemq</groupId>
|
||||
<artifactId>activemq-artemis-native</artifactId>
|
||||
|
|
|
@ -22,6 +22,8 @@ import java.util.List;
|
|||
import org.apache.activemq.artemis.api.config.ActiveMQDefaultConfiguration;
|
||||
import org.apache.activemq.artemis.api.core.ActiveMQIllegalStateException;
|
||||
import org.apache.activemq.artemis.api.core.TransportConfiguration;
|
||||
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
|
||||
import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration;
|
||||
import org.apache.activemq.artemis.core.config.ha.ColocatedPolicyConfiguration;
|
||||
import org.apache.activemq.artemis.core.config.ha.LiveOnlyPolicyConfiguration;
|
||||
import org.apache.activemq.artemis.core.config.ha.ReplicaPolicyConfiguration;
|
||||
|
@ -31,6 +33,8 @@ import org.apache.activemq.artemis.core.config.ha.SharedStoreSlavePolicyConfigur
|
|||
import org.apache.activemq.artemis.core.server.ActiveMQMessageBundle;
|
||||
import org.apache.activemq.artemis.core.server.ActiveMQServer;
|
||||
import org.apache.activemq.artemis.core.server.ActiveMQServerLogger;
|
||||
import org.apache.activemq.artemis.core.server.cluster.ha.ReplicationBackupPolicy;
|
||||
import org.apache.activemq.artemis.core.server.cluster.ha.ReplicationPrimaryPolicy;
|
||||
import org.apache.activemq.artemis.core.server.cluster.ha.BackupPolicy;
|
||||
import org.apache.activemq.artemis.core.server.cluster.ha.ColocatedPolicy;
|
||||
import org.apache.activemq.artemis.core.server.cluster.ha.HAPolicy;
|
||||
|
@ -79,6 +83,11 @@ public final class ConfigurationUtils {
|
|||
ReplicaPolicyConfiguration pc = (ReplicaPolicyConfiguration) conf;
|
||||
return new ReplicaPolicy(pc.getClusterName(), pc.getMaxSavedReplicatedJournalsSize(), pc.getGroupName(), pc.isRestartBackup(), pc.isAllowFailBack(), pc.getInitialReplicationSyncTimeout(), getScaleDownPolicy(pc.getScaleDownConfiguration()), server.getNetworkHealthCheck(), pc.getVoteOnReplicationFailure(), pc.getQuorumSize(), pc.getVoteRetries(), pc.getVoteRetryWait(), pc.getQuorumVoteWait(), pc.getRetryReplicationWait());
|
||||
}
|
||||
case PRIMARY:
|
||||
return ReplicationPrimaryPolicy.with((ReplicationPrimaryPolicyConfiguration) conf);
|
||||
case BACKUP: {
|
||||
return ReplicationBackupPolicy.with((ReplicationBackupPolicyConfiguration) conf);
|
||||
}
|
||||
case SHARED_STORE_MASTER: {
|
||||
SharedStoreMasterPolicyConfiguration pc = (SharedStoreMasterPolicyConfiguration) conf;
|
||||
return new SharedStoreMasterPolicy(pc.isFailoverOnServerShutdown(), pc.isWaitForActivation());
|
||||
|
|
|
@ -26,7 +26,9 @@ public interface HAPolicyConfiguration extends Serializable {
|
|||
REPLICA("Replica"),
|
||||
SHARED_STORE_MASTER("Shared Store Master"),
|
||||
SHARED_STORE_SLAVE("Shared Store Slave"),
|
||||
COLOCATED("Colocated");
|
||||
COLOCATED("Colocated"),
|
||||
PRIMARY("Primary"),
|
||||
BACKUP("Backup");
|
||||
|
||||
private String name;
|
||||
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.activemq.artemis.core.config.ha;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Map;
|
||||
|
||||
public class DistributedPrimitiveManagerConfiguration implements Serializable {
|
||||
|
||||
private final String className;
|
||||
private final Map<String, String> properties;
|
||||
|
||||
public DistributedPrimitiveManagerConfiguration(String className, Map<String, String> properties) {
|
||||
this.className = className;
|
||||
this.properties = properties;
|
||||
}
|
||||
|
||||
public Map<String, String> getProperties() {
|
||||
return properties;
|
||||
}
|
||||
|
||||
public String getClassName() {
|
||||
return className;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,115 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.activemq.artemis.core.config.ha;
|
||||
|
||||
import org.apache.activemq.artemis.api.config.ActiveMQDefaultConfiguration;
|
||||
import org.apache.activemq.artemis.core.config.HAPolicyConfiguration;
|
||||
|
||||
public class ReplicationBackupPolicyConfiguration implements HAPolicyConfiguration {
|
||||
|
||||
private String clusterName = null;
|
||||
|
||||
private int maxSavedReplicatedJournalsSize = ActiveMQDefaultConfiguration.getDefaultMaxSavedReplicatedJournalsSize();
|
||||
|
||||
private String groupName = null;
|
||||
|
||||
/*
|
||||
* used in the replicated policy after failover
|
||||
* */
|
||||
private boolean allowFailBack = false;
|
||||
|
||||
private long initialReplicationSyncTimeout = ActiveMQDefaultConfiguration.getDefaultInitialReplicationSyncTimeout();
|
||||
|
||||
private long retryReplicationWait = ActiveMQDefaultConfiguration.getDefaultRetryReplicationWait();
|
||||
|
||||
private DistributedPrimitiveManagerConfiguration distributedManagerConfiguration = null;
|
||||
|
||||
public static final ReplicationBackupPolicyConfiguration withDefault() {
|
||||
return new ReplicationBackupPolicyConfiguration();
|
||||
}
|
||||
|
||||
private ReplicationBackupPolicyConfiguration() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public HAPolicyConfiguration.TYPE getType() {
|
||||
return TYPE.BACKUP;
|
||||
}
|
||||
|
||||
public String getClusterName() {
|
||||
return clusterName;
|
||||
}
|
||||
|
||||
public ReplicationBackupPolicyConfiguration setClusterName(String clusterName) {
|
||||
this.clusterName = clusterName;
|
||||
return this;
|
||||
}
|
||||
|
||||
public int getMaxSavedReplicatedJournalsSize() {
|
||||
return maxSavedReplicatedJournalsSize;
|
||||
}
|
||||
|
||||
public ReplicationBackupPolicyConfiguration setMaxSavedReplicatedJournalsSize(int maxSavedReplicatedJournalsSize) {
|
||||
this.maxSavedReplicatedJournalsSize = maxSavedReplicatedJournalsSize;
|
||||
return this;
|
||||
}
|
||||
|
||||
public String getGroupName() {
|
||||
return groupName;
|
||||
}
|
||||
|
||||
public ReplicationBackupPolicyConfiguration setGroupName(String groupName) {
|
||||
this.groupName = groupName;
|
||||
return this;
|
||||
}
|
||||
|
||||
public boolean isAllowFailBack() {
|
||||
return allowFailBack;
|
||||
}
|
||||
|
||||
public ReplicationBackupPolicyConfiguration setAllowFailBack(boolean allowFailBack) {
|
||||
this.allowFailBack = allowFailBack;
|
||||
return this;
|
||||
}
|
||||
|
||||
public long getInitialReplicationSyncTimeout() {
|
||||
return initialReplicationSyncTimeout;
|
||||
}
|
||||
|
||||
public ReplicationBackupPolicyConfiguration setInitialReplicationSyncTimeout(long initialReplicationSyncTimeout) {
|
||||
this.initialReplicationSyncTimeout = initialReplicationSyncTimeout;
|
||||
return this;
|
||||
}
|
||||
|
||||
public long getRetryReplicationWait() {
|
||||
return retryReplicationWait;
|
||||
}
|
||||
|
||||
public ReplicationBackupPolicyConfiguration setRetryReplicationWait(long retryReplicationWait) {
|
||||
this.retryReplicationWait = retryReplicationWait;
|
||||
return this;
|
||||
}
|
||||
|
||||
public ReplicationBackupPolicyConfiguration setDistributedManagerConfiguration(DistributedPrimitiveManagerConfiguration configuration) {
|
||||
this.distributedManagerConfiguration = configuration;
|
||||
return this;
|
||||
}
|
||||
|
||||
public DistributedPrimitiveManagerConfiguration getDistributedManagerConfiguration() {
|
||||
return distributedManagerConfiguration;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,114 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.activemq.artemis.core.config.ha;
|
||||
|
||||
import org.apache.activemq.artemis.api.config.ActiveMQDefaultConfiguration;
|
||||
import org.apache.activemq.artemis.core.config.HAPolicyConfiguration;
|
||||
|
||||
public class ReplicationPrimaryPolicyConfiguration implements HAPolicyConfiguration {
|
||||
|
||||
private String groupName = null;
|
||||
|
||||
private String clusterName = null;
|
||||
|
||||
private long initialReplicationSyncTimeout = ActiveMQDefaultConfiguration.getDefaultInitialReplicationSyncTimeout();
|
||||
|
||||
private Long retryReplicationWait = ActiveMQDefaultConfiguration.getDefaultRetryReplicationWait();
|
||||
|
||||
private DistributedPrimitiveManagerConfiguration distributedManagerConfiguration = null;
|
||||
|
||||
private String coordinationId = null;
|
||||
|
||||
public static ReplicationPrimaryPolicyConfiguration withDefault() {
|
||||
return new ReplicationPrimaryPolicyConfiguration();
|
||||
}
|
||||
|
||||
private ReplicationPrimaryPolicyConfiguration() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public TYPE getType() {
|
||||
return TYPE.PRIMARY;
|
||||
}
|
||||
|
||||
public String getGroupName() {
|
||||
return groupName;
|
||||
}
|
||||
|
||||
public ReplicationPrimaryPolicyConfiguration setGroupName(String groupName) {
|
||||
this.groupName = groupName;
|
||||
return this;
|
||||
}
|
||||
|
||||
public String getClusterName() {
|
||||
return clusterName;
|
||||
}
|
||||
|
||||
public ReplicationPrimaryPolicyConfiguration setClusterName(String clusterName) {
|
||||
this.clusterName = clusterName;
|
||||
return this;
|
||||
}
|
||||
|
||||
public long getInitialReplicationSyncTimeout() {
|
||||
return initialReplicationSyncTimeout;
|
||||
}
|
||||
|
||||
public ReplicationPrimaryPolicyConfiguration setInitialReplicationSyncTimeout(long initialReplicationSyncTimeout) {
|
||||
this.initialReplicationSyncTimeout = initialReplicationSyncTimeout;
|
||||
return this;
|
||||
}
|
||||
|
||||
public void setRetryReplicationWait(Long retryReplicationWait) {
|
||||
this.retryReplicationWait = retryReplicationWait;
|
||||
}
|
||||
|
||||
public Long getRetryReplicationWait() {
|
||||
return retryReplicationWait;
|
||||
}
|
||||
|
||||
public ReplicationPrimaryPolicyConfiguration setDistributedManagerConfiguration(DistributedPrimitiveManagerConfiguration configuration) {
|
||||
this.distributedManagerConfiguration = configuration;
|
||||
return this;
|
||||
}
|
||||
|
||||
public DistributedPrimitiveManagerConfiguration getDistributedManagerConfiguration() {
|
||||
return distributedManagerConfiguration;
|
||||
}
|
||||
|
||||
public String getCoordinationId() {
|
||||
return coordinationId;
|
||||
}
|
||||
|
||||
public void setCoordinationId(String newCoordinationId) {
|
||||
if (newCoordinationId == null) {
|
||||
return;
|
||||
}
|
||||
final int len = newCoordinationId.length();
|
||||
if (len >= 16) {
|
||||
this.coordinationId = newCoordinationId.substring(0, 16);
|
||||
} else if (len % 2 != 0) {
|
||||
// must be even for conversion to uuid, extend to next even
|
||||
this.coordinationId = newCoordinationId + "+";
|
||||
} else if (len > 0 ) {
|
||||
// run with it
|
||||
this.coordinationId = newCoordinationId;
|
||||
}
|
||||
if (this.coordinationId != null) {
|
||||
this.coordinationId = this.coordinationId.replace('-', '.');
|
||||
}
|
||||
}
|
||||
}
|
|
@ -69,7 +69,10 @@ import org.apache.activemq.artemis.core.config.federation.FederationQueuePolicyC
|
|||
import org.apache.activemq.artemis.core.config.federation.FederationStreamConfiguration;
|
||||
import org.apache.activemq.artemis.core.config.federation.FederationTransformerConfiguration;
|
||||
import org.apache.activemq.artemis.core.config.federation.FederationUpstreamConfiguration;
|
||||
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
|
||||
import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration;
|
||||
import org.apache.activemq.artemis.core.config.ha.ColocatedPolicyConfiguration;
|
||||
import org.apache.activemq.artemis.core.config.ha.DistributedPrimitiveManagerConfiguration;
|
||||
import org.apache.activemq.artemis.core.config.ha.LiveOnlyPolicyConfiguration;
|
||||
import org.apache.activemq.artemis.core.config.ha.ReplicaPolicyConfiguration;
|
||||
import org.apache.activemq.artemis.core.config.ha.ReplicatedPolicyConfiguration;
|
||||
|
@ -1607,6 +1610,16 @@ public final class FileConfigurationParser extends XMLConfigurationUtil {
|
|||
Element colocatedNode = (Element) colocatedNodeList.item(0);
|
||||
mainConfig.setHAPolicyConfiguration(createColocatedHaPolicy(colocatedNode, true));
|
||||
}
|
||||
NodeList primaryNodeList = e.getElementsByTagName("primary");
|
||||
if (primaryNodeList.getLength() > 0) {
|
||||
Element primaryNode = (Element) primaryNodeList.item(0);
|
||||
mainConfig.setHAPolicyConfiguration(createReplicationPrimaryHaPolicy(primaryNode, mainConfig));
|
||||
}
|
||||
NodeList backupNodeList = e.getElementsByTagName("backup");
|
||||
if (backupNodeList.getLength() > 0) {
|
||||
Element backupNode = (Element) backupNodeList.item(0);
|
||||
mainConfig.setHAPolicyConfiguration(createReplicationBackupHaPolicy(backupNode, mainConfig));
|
||||
}
|
||||
} else if (haNode.getTagName().equals("shared-store")) {
|
||||
NodeList masterNodeList = e.getElementsByTagName("master");
|
||||
if (masterNodeList.getLength() > 0) {
|
||||
|
@ -1699,6 +1712,67 @@ public final class FileConfigurationParser extends XMLConfigurationUtil {
|
|||
return configuration;
|
||||
}
|
||||
|
||||
private ReplicationPrimaryPolicyConfiguration createReplicationPrimaryHaPolicy(Element policyNode, Configuration config) {
|
||||
ReplicationPrimaryPolicyConfiguration configuration = ReplicationPrimaryPolicyConfiguration.withDefault();
|
||||
|
||||
configuration.setGroupName(getString(policyNode, "group-name", configuration.getGroupName(), Validators.NO_CHECK));
|
||||
|
||||
configuration.setClusterName(getString(policyNode, "cluster-name", configuration.getClusterName(), Validators.NO_CHECK));
|
||||
|
||||
configuration.setInitialReplicationSyncTimeout(getLong(policyNode, "initial-replication-sync-timeout", configuration.getInitialReplicationSyncTimeout(), Validators.GT_ZERO));
|
||||
|
||||
configuration.setRetryReplicationWait(getLong(policyNode, "retry-replication-wait", configuration.getRetryReplicationWait(), Validators.GT_ZERO));
|
||||
|
||||
configuration.setDistributedManagerConfiguration(createDistributedPrimitiveManagerConfiguration(policyNode, config));
|
||||
|
||||
configuration.setCoordinationId(getString(policyNode, "coordination-id", configuration.getCoordinationId(), Validators.NOT_NULL_OR_EMPTY));
|
||||
|
||||
return configuration;
|
||||
}
|
||||
|
||||
private ReplicationBackupPolicyConfiguration createReplicationBackupHaPolicy(Element policyNode, Configuration config) {
|
||||
|
||||
ReplicationBackupPolicyConfiguration configuration = ReplicationBackupPolicyConfiguration.withDefault();
|
||||
|
||||
configuration.setGroupName(getString(policyNode, "group-name", configuration.getGroupName(), Validators.NO_CHECK));
|
||||
|
||||
configuration.setAllowFailBack(getBoolean(policyNode, "allow-failback", configuration.isAllowFailBack()));
|
||||
|
||||
configuration.setInitialReplicationSyncTimeout(getLong(policyNode, "initial-replication-sync-timeout", configuration.getInitialReplicationSyncTimeout(), Validators.GT_ZERO));
|
||||
|
||||
configuration.setClusterName(getString(policyNode, "cluster-name", configuration.getClusterName(), Validators.NO_CHECK));
|
||||
|
||||
configuration.setMaxSavedReplicatedJournalsSize(getInteger(policyNode, "max-saved-replicated-journals-size", configuration.getMaxSavedReplicatedJournalsSize(), Validators.MINUS_ONE_OR_GE_ZERO));
|
||||
|
||||
configuration.setRetryReplicationWait(getLong(policyNode, "retry-replication-wait", configuration.getRetryReplicationWait(), Validators.GT_ZERO));
|
||||
|
||||
configuration.setDistributedManagerConfiguration(createDistributedPrimitiveManagerConfiguration(policyNode, config));
|
||||
|
||||
return configuration;
|
||||
}
|
||||
|
||||
private DistributedPrimitiveManagerConfiguration createDistributedPrimitiveManagerConfiguration(Element policyNode, Configuration config) {
|
||||
final Element managerNode = (Element) policyNode.getElementsByTagName("manager").item(0);
|
||||
final String className = getString(managerNode, "class-name",
|
||||
ActiveMQDefaultConfiguration.getDefaultDistributedPrimitiveManagerClassName(),
|
||||
Validators.NO_CHECK);
|
||||
final Map<String, String> properties;
|
||||
if (parameterExists(managerNode, "properties")) {
|
||||
final NodeList propertyNodeList = managerNode.getElementsByTagName("property");
|
||||
final int propertiesCount = propertyNodeList.getLength();
|
||||
properties = new HashMap<>(propertiesCount);
|
||||
for (int i = 0; i < propertiesCount; i++) {
|
||||
final Element propertyNode = (Element) propertyNodeList.item(i);
|
||||
final String propertyName = propertyNode.getAttributeNode("key").getValue();
|
||||
final String propertyValue = propertyNode.getAttributeNode("value").getValue();
|
||||
properties.put(propertyName, propertyValue);
|
||||
}
|
||||
} else {
|
||||
properties = new HashMap<>(1);
|
||||
}
|
||||
return new DistributedPrimitiveManagerConfiguration(className, properties);
|
||||
}
|
||||
|
||||
private SharedStoreMasterPolicyConfiguration createSharedStoreMasterHaPolicy(Element policyNode) {
|
||||
SharedStoreMasterPolicyConfiguration configuration = new SharedStoreMasterPolicyConfiguration();
|
||||
|
||||
|
|
|
@ -4204,6 +4204,17 @@ public class ActiveMQServerControlImpl extends AbstractControl implements Active
|
|||
return server.getNodeID() == null ? null : server.getNodeID().toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getActivationSequence() {
|
||||
if (AuditLogger.isBaseLoggingEnabled()) {
|
||||
AuditLogger.getActivationSequence(this.server);
|
||||
}
|
||||
if (server.getNodeManager() != null) {
|
||||
return server.getNodeManager().getNodeActivationSequence();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getManagementNotificationAddress() {
|
||||
if (AuditLogger.isBaseLoggingEnabled()) {
|
||||
|
|
|
@ -43,7 +43,8 @@ public class ReplicationStartSyncMessage extends PacketImpl {
|
|||
public enum SyncDataType {
|
||||
JournalBindings(AbstractJournalStorageManager.JournalContent.BINDINGS.typeByte),
|
||||
JournalMessages(AbstractJournalStorageManager.JournalContent.MESSAGES.typeByte),
|
||||
LargeMessages((byte) 2);
|
||||
LargeMessages((byte) 2),
|
||||
ActivationSequence((byte) 3);
|
||||
|
||||
private byte code;
|
||||
|
||||
|
@ -62,6 +63,9 @@ public class ReplicationStartSyncMessage extends PacketImpl {
|
|||
return JournalMessages;
|
||||
if (code == LargeMessages.code)
|
||||
return LargeMessages;
|
||||
if (code == ActivationSequence.code)
|
||||
return ActivationSequence;
|
||||
|
||||
throw new InvalidParameterException("invalid byte: " + code);
|
||||
}
|
||||
}
|
||||
|
@ -80,6 +84,14 @@ public class ReplicationStartSyncMessage extends PacketImpl {
|
|||
nodeID = ""; // this value will be ignored
|
||||
}
|
||||
|
||||
|
||||
public ReplicationStartSyncMessage(String nodeID, long nodeDataVersion) {
|
||||
this(nodeID);
|
||||
ids = new long[1];
|
||||
ids[0] = nodeDataVersion;
|
||||
dataType = SyncDataType.ActivationSequence;
|
||||
}
|
||||
|
||||
public ReplicationStartSyncMessage(String nodeID) {
|
||||
this();
|
||||
synchronizationIsFinished = true;
|
||||
|
@ -118,10 +130,6 @@ public class ReplicationStartSyncMessage extends PacketImpl {
|
|||
DataConstants.SIZE_BOOLEAN + // buffer.writeBoolean(allowsAutoFailBack);
|
||||
nodeID.length() * 3; // buffer.writeString(nodeID); -- an estimate
|
||||
|
||||
|
||||
if (synchronizationIsFinished) {
|
||||
return size;
|
||||
}
|
||||
size += DataConstants.SIZE_BYTE + // buffer.writeByte(dataType.code);
|
||||
DataConstants.SIZE_INT + // buffer.writeInt(ids.length);
|
||||
DataConstants.SIZE_LONG * ids.length; // the write loop
|
||||
|
@ -135,8 +143,6 @@ public class ReplicationStartSyncMessage extends PacketImpl {
|
|||
buffer.writeBoolean(synchronizationIsFinished);
|
||||
buffer.writeBoolean(allowsAutoFailBack);
|
||||
buffer.writeString(nodeID);
|
||||
if (synchronizationIsFinished)
|
||||
return;
|
||||
buffer.writeByte(dataType.code);
|
||||
buffer.writeInt(ids.length);
|
||||
for (long id : ids) {
|
||||
|
@ -149,9 +155,6 @@ public class ReplicationStartSyncMessage extends PacketImpl {
|
|||
synchronizationIsFinished = buffer.readBoolean();
|
||||
allowsAutoFailBack = buffer.readBoolean();
|
||||
nodeID = buffer.readString();
|
||||
if (synchronizationIsFinished) {
|
||||
return;
|
||||
}
|
||||
dataType = SyncDataType.getDataType(buffer.readByte());
|
||||
int length = buffer.readInt();
|
||||
ids = new long[length];
|
||||
|
|
|
@ -37,7 +37,6 @@ import org.apache.activemq.artemis.api.core.Interceptor;
|
|||
import org.apache.activemq.artemis.api.core.Message;
|
||||
import org.apache.activemq.artemis.api.core.SimpleString;
|
||||
import org.apache.activemq.artemis.core.config.Configuration;
|
||||
import org.apache.activemq.artemis.core.io.IOCriticalErrorListener;
|
||||
import org.apache.activemq.artemis.core.io.SequentialFile;
|
||||
import org.apache.activemq.artemis.core.journal.EncoderPersister;
|
||||
import org.apache.activemq.artemis.core.journal.Journal;
|
||||
|
@ -82,9 +81,8 @@ import org.apache.activemq.artemis.core.replication.ReplicationManager.ADD_OPERA
|
|||
import org.apache.activemq.artemis.core.server.ActiveMQComponent;
|
||||
import org.apache.activemq.artemis.core.server.ActiveMQMessageBundle;
|
||||
import org.apache.activemq.artemis.core.server.ActiveMQServerLogger;
|
||||
import org.apache.activemq.artemis.core.server.cluster.qourum.SharedNothingBackupQuorum;
|
||||
|
||||
import org.apache.activemq.artemis.core.server.impl.ActiveMQServerImpl;
|
||||
import org.apache.activemq.artemis.core.server.impl.SharedNothingBackupActivation;
|
||||
import org.apache.activemq.artemis.utils.actors.OrderedExecutorFactory;
|
||||
import org.jboss.logging.Logger;
|
||||
|
||||
|
@ -94,12 +92,20 @@ import org.jboss.logging.Logger;
|
|||
*/
|
||||
public final class ReplicationEndpoint implements ChannelHandler, ActiveMQComponent {
|
||||
|
||||
public interface ReplicationEndpointEventListener {
|
||||
|
||||
void onRemoteBackupUpToDate();
|
||||
|
||||
void onLiveStopping(ReplicationLiveIsStoppingMessage.LiveStopping message) throws ActiveMQException;
|
||||
|
||||
void onLiveNodeId(String nodeId);
|
||||
}
|
||||
|
||||
private static final Logger logger = Logger.getLogger(ReplicationEndpoint.class);
|
||||
|
||||
private final IOCriticalErrorListener criticalErrorListener;
|
||||
private final ActiveMQServerImpl server;
|
||||
private final boolean wantedFailBack;
|
||||
private final SharedNothingBackupActivation activation;
|
||||
private final ReplicationEndpointEventListener eventListener;
|
||||
private final boolean noSync = false;
|
||||
private Channel channel;
|
||||
private boolean supportResponseBatching;
|
||||
|
@ -129,8 +135,6 @@ public final class ReplicationEndpoint implements ChannelHandler, ActiveMQCompon
|
|||
private boolean deletePages = true;
|
||||
private volatile boolean started;
|
||||
|
||||
private SharedNothingBackupQuorum backupQuorum;
|
||||
|
||||
private Executor executor;
|
||||
|
||||
private List<Interceptor> outgoingInterceptors = null;
|
||||
|
@ -140,13 +144,11 @@ public final class ReplicationEndpoint implements ChannelHandler, ActiveMQCompon
|
|||
|
||||
// Constructors --------------------------------------------------
|
||||
public ReplicationEndpoint(final ActiveMQServerImpl server,
|
||||
IOCriticalErrorListener criticalErrorListener,
|
||||
boolean wantedFailBack,
|
||||
SharedNothingBackupActivation activation) {
|
||||
ReplicationEndpointEventListener eventListener) {
|
||||
this.server = server;
|
||||
this.criticalErrorListener = criticalErrorListener;
|
||||
this.wantedFailBack = wantedFailBack;
|
||||
this.activation = activation;
|
||||
this.eventListener = eventListener;
|
||||
this.pendingPackets = new ArrayDeque<>();
|
||||
this.supportResponseBatching = false;
|
||||
}
|
||||
|
@ -287,7 +289,7 @@ public final class ReplicationEndpoint implements ChannelHandler, ActiveMQCompon
|
|||
* @throws ActiveMQException
|
||||
*/
|
||||
private void handleLiveStopping(ReplicationLiveIsStoppingMessage packet) throws ActiveMQException {
|
||||
activation.remoteFailOver(packet.isFinalMessage());
|
||||
eventListener.onLiveStopping(packet.isFinalMessage());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -474,14 +476,14 @@ public final class ReplicationEndpoint implements ChannelHandler, ActiveMQCompon
|
|||
}
|
||||
|
||||
journalsHolder = null;
|
||||
backupQuorum.liveIDSet(liveID);
|
||||
activation.setRemoteBackupUpToDate();
|
||||
eventListener.onLiveNodeId(liveID);
|
||||
eventListener.onRemoteBackupUpToDate();
|
||||
|
||||
if (logger.isTraceEnabled()) {
|
||||
logger.trace("Backup is synchronized / BACKUP-SYNC-DONE");
|
||||
}
|
||||
|
||||
ActiveMQServerLogger.LOGGER.backupServerSynched(server);
|
||||
ActiveMQServerLogger.LOGGER.backupServerSynchronized(server, liveID);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -558,6 +560,11 @@ public final class ReplicationEndpoint implements ChannelHandler, ActiveMQCompon
|
|||
return replicationResponseMessage;
|
||||
|
||||
if (packet.isSynchronizationFinished()) {
|
||||
if (packet.getFileIds() != null && packet.getFileIds().length == 1) {
|
||||
// this is the version sequence of the data we are replicating
|
||||
// verified if we activate with this data
|
||||
server.getNodeManager().writeNodeActivationSequence(packet.getFileIds()[0]);
|
||||
}
|
||||
finishSynchronization(packet.getNodeID());
|
||||
replicationResponseMessage.setSynchronizationIsFinishedAcknowledgement(true);
|
||||
return replicationResponseMessage;
|
||||
|
@ -597,7 +604,7 @@ public final class ReplicationEndpoint implements ChannelHandler, ActiveMQCompon
|
|||
if (packet.getNodeID() != null) {
|
||||
// At the start of replication, we still do not know which is the nodeID that the live uses.
|
||||
// This is the point where the backup gets this information.
|
||||
backupQuorum.liveIDSet(packet.getNodeID());
|
||||
eventListener.onLiveNodeId(packet.getNodeID());
|
||||
}
|
||||
|
||||
break;
|
||||
|
@ -900,16 +907,6 @@ public final class ReplicationEndpoint implements ChannelHandler, ActiveMQCompon
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the quorumManager used by the server in the replicationEndpoint. It is used to inform the
|
||||
* backup server of the live's nodeID.
|
||||
*
|
||||
* @param backupQuorum
|
||||
*/
|
||||
public void setBackupQuorum(SharedNothingBackupQuorum backupQuorum) {
|
||||
this.backupQuorum = backupQuorum;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param executor2
|
||||
*/
|
||||
|
|
|
@ -821,7 +821,7 @@ public final class ReplicationManager implements ActiveMQComponent {
|
|||
}
|
||||
|
||||
synchronizationIsFinishedAcknowledgement.countUp();
|
||||
sendReplicatePacket(new ReplicationStartSyncMessage(nodeID));
|
||||
sendReplicatePacket(new ReplicationStartSyncMessage(nodeID, server.getNodeManager().getNodeActivationSequence()));
|
||||
try {
|
||||
if (!synchronizationIsFinishedAcknowledgement.await(initialReplicationSyncTimeout)) {
|
||||
ActiveMQReplicationTimeooutException exception = ActiveMQMessageBundle.BUNDLE.replicationSynchronizationTimeout(initialReplicationSyncTimeout);
|
||||
|
|
|
@ -40,7 +40,6 @@ import org.apache.activemq.artemis.core.persistence.OperationContext;
|
|||
import org.apache.activemq.artemis.core.persistence.StorageManager;
|
||||
import org.apache.activemq.artemis.core.postoffice.PostOffice;
|
||||
import org.apache.activemq.artemis.core.remoting.server.RemotingService;
|
||||
import org.apache.activemq.artemis.core.replication.ReplicationEndpoint;
|
||||
import org.apache.activemq.artemis.core.replication.ReplicationManager;
|
||||
import org.apache.activemq.artemis.core.security.Role;
|
||||
import org.apache.activemq.artemis.core.security.SecurityAuth;
|
||||
|
@ -166,11 +165,6 @@ public interface ActiveMQServer extends ServiceComponent {
|
|||
|
||||
CriticalAnalyzer getCriticalAnalyzer();
|
||||
|
||||
/**
|
||||
* @return
|
||||
*/
|
||||
ReplicationEndpoint getReplicationEndpoint();
|
||||
|
||||
/**
|
||||
* it will release hold a lock for the activation.
|
||||
*/
|
||||
|
|
|
@ -190,8 +190,8 @@ public interface ActiveMQServerLogger extends BasicLogger {
|
|||
void errorStoppingConnectorService(@Cause Throwable e, String name);
|
||||
|
||||
@LogMessage(level = Logger.Level.INFO)
|
||||
@Message(id = 221024, value = "Backup server {0} is synchronized with live-server.", format = Message.Format.MESSAGE_FORMAT)
|
||||
void backupServerSynched(ActiveMQServerImpl server);
|
||||
@Message(id = 221024, value = "Backup server {0} is synchronized with live server, nodeID={1}.", format = Message.Format.MESSAGE_FORMAT)
|
||||
void backupServerSynchronized(ActiveMQServerImpl server, String liveID);
|
||||
|
||||
@LogMessage(level = Logger.Level.INFO)
|
||||
@Message(id = 221025, value = "Replication: sending {0} (size={1}) to replica.", format = Message.Format.MESSAGE_FORMAT)
|
||||
|
|
|
@ -21,7 +21,6 @@ import org.apache.activemq.artemis.api.core.Pair;
|
|||
import org.apache.activemq.artemis.api.core.TransportConfiguration;
|
||||
import org.apache.activemq.artemis.api.core.client.ClusterTopologyListener;
|
||||
import org.apache.activemq.artemis.core.client.impl.ServerLocatorInternal;
|
||||
import org.apache.activemq.artemis.core.server.cluster.qourum.SharedNothingBackupQuorum;
|
||||
|
||||
/**
|
||||
* A class that will locate a particular live server running in a cluster. How this live is chosen
|
||||
|
@ -31,16 +30,23 @@ import org.apache.activemq.artemis.core.server.cluster.qourum.SharedNothingBacku
|
|||
*/
|
||||
public abstract class LiveNodeLocator implements ClusterTopologyListener {
|
||||
|
||||
private SharedNothingBackupQuorum backupQuorum;
|
||||
@FunctionalInterface
|
||||
public interface BackupRegistrationListener {
|
||||
|
||||
public LiveNodeLocator(SharedNothingBackupQuorum backupQuorum) {
|
||||
this.backupQuorum = backupQuorum;
|
||||
void onBackupRegistrationFailed(boolean alreadyReplicating);
|
||||
}
|
||||
|
||||
private final BackupRegistrationListener backupRegistrationListener;
|
||||
|
||||
public LiveNodeLocator(BackupRegistrationListener backupRegistrationListener) {
|
||||
this.backupRegistrationListener = backupRegistrationListener;
|
||||
}
|
||||
|
||||
/**
|
||||
* Use this constructor when the LiveNodeLocator is used for scaling down rather than replicating
|
||||
*/
|
||||
public LiveNodeLocator() {
|
||||
this(null);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -67,12 +73,8 @@ public abstract class LiveNodeLocator implements ClusterTopologyListener {
|
|||
* tells the locator the the current connector has failed.
|
||||
*/
|
||||
public void notifyRegistrationFailed(boolean alreadyReplicating) {
|
||||
if (backupQuorum != null) {
|
||||
if (alreadyReplicating) {
|
||||
backupQuorum.notifyAlreadyReplicating();
|
||||
} else {
|
||||
backupQuorum.notifyRegistrationFailed();
|
||||
}
|
||||
if (backupRegistrationListener != null) {
|
||||
backupRegistrationListener.onBackupRegistrationFailed(alreadyReplicating);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -39,6 +39,7 @@ public abstract class NodeManager implements ActiveMQComponent {
|
|||
private UUID uuid;
|
||||
private boolean isStarted = false;
|
||||
private final Set<FileLockNodeManager.LockListener> lockListeners;
|
||||
protected long nodeActivationSequence; // local version of a coordinated sequence, tracking state transitions of ownership
|
||||
|
||||
public NodeManager(final boolean replicatedBackup) {
|
||||
this.replicatedBackup = replicatedBackup;
|
||||
|
@ -79,8 +80,30 @@ public abstract class NodeManager implements ActiveMQComponent {
|
|||
}
|
||||
}
|
||||
|
||||
public long readNodeActivationSequence() throws NodeManagerException {
|
||||
// TODO make it abstract
|
||||
throw new UnsupportedOperationException("TODO");
|
||||
}
|
||||
|
||||
public void writeNodeActivationSequence(long version) throws NodeManagerException {
|
||||
// TODO make it abstract
|
||||
throw new UnsupportedOperationException("TODO");
|
||||
}
|
||||
|
||||
public abstract SimpleString readNodeId() throws NodeManagerException;
|
||||
|
||||
public long getNodeActivationSequence() {
|
||||
synchronized (nodeIDGuard) {
|
||||
return nodeActivationSequence;
|
||||
}
|
||||
}
|
||||
|
||||
public void setNodeActivationSequence(long activationSequence) {
|
||||
synchronized (nodeIDGuard) {
|
||||
nodeActivationSequence = activationSequence;
|
||||
}
|
||||
}
|
||||
|
||||
public UUID getUUID() {
|
||||
synchronized (nodeIDGuard) {
|
||||
return uuid;
|
||||
|
|
|
@ -80,10 +80,16 @@ public class ClusterController implements ActiveMQComponent {
|
|||
private boolean started;
|
||||
private SimpleString replicatedClusterName;
|
||||
|
||||
public ClusterController(ActiveMQServer server, ScheduledExecutorService scheduledExecutor) {
|
||||
public ClusterController(ActiveMQServer server,
|
||||
ScheduledExecutorService scheduledExecutor,
|
||||
boolean useQuorumManager) {
|
||||
this.server = server;
|
||||
executor = server.getExecutorFactory().getExecutor();
|
||||
quorumManager = new QuorumManager(scheduledExecutor, this);
|
||||
quorumManager = useQuorumManager ? new QuorumManager(scheduledExecutor, this) : null;
|
||||
}
|
||||
|
||||
public ClusterController(ActiveMQServer server, ScheduledExecutorService scheduledExecutor) {
|
||||
this(server, scheduledExecutor, true);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -108,11 +114,11 @@ public class ClusterController implements ActiveMQComponent {
|
|||
//latch so we know once we are connected
|
||||
replicationClusterConnectedLatch = new CountDownLatch(1);
|
||||
//and add the quorum manager as a topology listener
|
||||
if (defaultLocator != null) {
|
||||
defaultLocator.addClusterTopologyListener(quorumManager);
|
||||
}
|
||||
|
||||
if (quorumManager != null) {
|
||||
if (defaultLocator != null) {
|
||||
defaultLocator.addClusterTopologyListener(quorumManager);
|
||||
}
|
||||
|
||||
//start the quorum manager
|
||||
quorumManager.start();
|
||||
}
|
||||
|
@ -126,6 +132,26 @@ public class ClusterController implements ActiveMQComponent {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* It adds {@code clusterTopologyListener} to {@code defaultLocator}.
|
||||
*/
|
||||
public void addClusterTopologyListener(ClusterTopologyListener clusterTopologyListener) {
|
||||
if (!this.started || defaultLocator == null) {
|
||||
throw new IllegalStateException("the controller must be started and with a locator initialized");
|
||||
}
|
||||
this.defaultLocator.addClusterTopologyListener(clusterTopologyListener);
|
||||
}
|
||||
|
||||
/**
|
||||
* It remove {@code clusterTopologyListener} from {@code defaultLocator}.
|
||||
*/
|
||||
public void removeClusterTopologyListener(ClusterTopologyListener clusterTopologyListener) {
|
||||
if (!this.started || defaultLocator == null) {
|
||||
throw new IllegalStateException("the controller must be started and with a locator initialized");
|
||||
}
|
||||
this.defaultLocator.removeClusterTopologyListener(clusterTopologyListener);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void stop() throws Exception {
|
||||
if (logger.isDebugEnabled()) {
|
||||
|
@ -138,7 +164,9 @@ public class ClusterController implements ActiveMQComponent {
|
|||
serverLocatorInternal.close();
|
||||
}
|
||||
//stop the quorum manager
|
||||
quorumManager.stop();
|
||||
if (quorumManager != null) {
|
||||
quorumManager.stop();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -223,6 +251,17 @@ public class ClusterController implements ActiveMQComponent {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* add a cluster listener
|
||||
*
|
||||
* @param listener
|
||||
*/
|
||||
public void removeClusterTopologyListenerForReplication(ClusterTopologyListener listener) {
|
||||
if (replicationLocator != null) {
|
||||
replicationLocator.removeClusterTopologyListener(listener);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* add an interceptor
|
||||
*
|
||||
|
@ -232,6 +271,15 @@ public class ClusterController implements ActiveMQComponent {
|
|||
replicationLocator.addIncomingInterceptor(interceptor);
|
||||
}
|
||||
|
||||
/**
|
||||
* remove an interceptor
|
||||
*
|
||||
* @param interceptor
|
||||
*/
|
||||
public void removeIncomingInterceptorForReplication(Interceptor interceptor) {
|
||||
replicationLocator.removeIncomingInterceptor(interceptor);
|
||||
}
|
||||
|
||||
/**
|
||||
* connect to a specific node in the cluster used for replication
|
||||
*
|
||||
|
@ -406,7 +454,11 @@ public class ClusterController implements ActiveMQComponent {
|
|||
logger.debug("there is no acceptor used configured at the CoreProtocolManager " + this);
|
||||
}
|
||||
} else if (packet.getType() == PacketImpl.QUORUM_VOTE) {
|
||||
quorumManager.handleQuorumVote(clusterChannel, packet);
|
||||
if (quorumManager != null) {
|
||||
quorumManager.handleQuorumVote(clusterChannel, packet);
|
||||
} else {
|
||||
logger.warnf("Received %s on a cluster connection that's using the new quorum vote algorithm.", packet);
|
||||
}
|
||||
} else if (packet.getType() == PacketImpl.SCALEDOWN_ANNOUNCEMENT) {
|
||||
ScaleDownAnnounceMessage message = (ScaleDownAnnounceMessage) packet;
|
||||
//we don't really need to check as it should always be true
|
||||
|
|
|
@ -157,7 +157,7 @@ public class ClusterManager implements ActiveMQComponent {
|
|||
final ManagementService managementService,
|
||||
final Configuration configuration,
|
||||
final NodeManager nodeManager,
|
||||
final boolean backup) {
|
||||
final boolean useQuorumManager) {
|
||||
this.executorFactory = executorFactory;
|
||||
|
||||
executor = executorFactory.getExecutor();
|
||||
|
@ -174,7 +174,7 @@ public class ClusterManager implements ActiveMQComponent {
|
|||
|
||||
this.nodeManager = nodeManager;
|
||||
|
||||
clusterController = new ClusterController(server, scheduledExecutor);
|
||||
clusterController = new ClusterController(server, scheduledExecutor, useQuorumManager);
|
||||
|
||||
haManager = server.getActivation().getHAManager();
|
||||
}
|
||||
|
|
|
@ -57,4 +57,8 @@ public interface HAPolicy<T extends Activation> {
|
|||
|
||||
String getScaleDownClustername();
|
||||
|
||||
default boolean useQuorumManager() {
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,157 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.activemq.artemis.core.server.cluster.ha;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
|
||||
import org.apache.activemq.artemis.core.config.ha.DistributedPrimitiveManagerConfiguration;
|
||||
import org.apache.activemq.artemis.core.io.IOCriticalErrorListener;
|
||||
import org.apache.activemq.artemis.core.server.impl.ActiveMQServerImpl;
|
||||
import org.apache.activemq.artemis.core.server.impl.ReplicationBackupActivation;
|
||||
import org.apache.activemq.artemis.quorum.DistributedPrimitiveManager;
|
||||
|
||||
public class ReplicationBackupPolicy implements HAPolicy<ReplicationBackupActivation> {
|
||||
|
||||
private final ReplicationPrimaryPolicy livePolicy;
|
||||
private final String groupName;
|
||||
private final String clusterName;
|
||||
private final int maxSavedReplicatedJournalsSize;
|
||||
private final long retryReplicationWait;
|
||||
private final DistributedPrimitiveManagerConfiguration managerConfiguration;
|
||||
private final boolean tryFailback;
|
||||
|
||||
private ReplicationBackupPolicy(ReplicationBackupPolicyConfiguration configuration,
|
||||
ReplicationPrimaryPolicy livePolicy) {
|
||||
Objects.requireNonNull(livePolicy);
|
||||
this.clusterName = configuration.getClusterName();
|
||||
this.maxSavedReplicatedJournalsSize = configuration.getMaxSavedReplicatedJournalsSize();
|
||||
this.groupName = configuration.getGroupName();
|
||||
this.retryReplicationWait = configuration.getRetryReplicationWait();
|
||||
this.managerConfiguration = configuration.getDistributedManagerConfiguration();
|
||||
this.tryFailback = true;
|
||||
this.livePolicy = livePolicy;
|
||||
}
|
||||
|
||||
private ReplicationBackupPolicy(ReplicationBackupPolicyConfiguration configuration) {
|
||||
this.clusterName = configuration.getClusterName();
|
||||
this.maxSavedReplicatedJournalsSize = configuration.getMaxSavedReplicatedJournalsSize();
|
||||
this.groupName = configuration.getGroupName();
|
||||
this.retryReplicationWait = configuration.getRetryReplicationWait();
|
||||
this.managerConfiguration = configuration.getDistributedManagerConfiguration();
|
||||
this.tryFailback = false;
|
||||
livePolicy = ReplicationPrimaryPolicy.failoverPolicy(
|
||||
configuration.getInitialReplicationSyncTimeout(),
|
||||
configuration.getGroupName(),
|
||||
configuration.getClusterName(),
|
||||
this,
|
||||
configuration.isAllowFailBack(),
|
||||
configuration.getDistributedManagerConfiguration());
|
||||
}
|
||||
|
||||
public boolean isTryFailback() {
|
||||
return tryFailback;
|
||||
}
|
||||
|
||||
/**
|
||||
* It creates a policy which live policy won't cause to broker to try failback.
|
||||
*/
|
||||
public static ReplicationBackupPolicy with(ReplicationBackupPolicyConfiguration configuration) {
|
||||
return new ReplicationBackupPolicy(configuration);
|
||||
}
|
||||
|
||||
/**
|
||||
* It creates a companion backup policy for a natural-born primary: it would cause the broker to try failback.
|
||||
*/
|
||||
static ReplicationBackupPolicy failback(long retryReplicationWait,
|
||||
String clusterName,
|
||||
String groupName,
|
||||
ReplicationPrimaryPolicy livePolicy,
|
||||
DistributedPrimitiveManagerConfiguration distributedManagerConfiguration) {
|
||||
return new ReplicationBackupPolicy(ReplicationBackupPolicyConfiguration.withDefault()
|
||||
.setRetryReplicationWait(retryReplicationWait)
|
||||
.setClusterName(clusterName)
|
||||
.setGroupName(groupName)
|
||||
.setDistributedManagerConfiguration(distributedManagerConfiguration),
|
||||
livePolicy);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ReplicationBackupActivation createActivation(ActiveMQServerImpl server,
|
||||
boolean wasLive,
|
||||
Map<String, Object> activationParams,
|
||||
IOCriticalErrorListener shutdownOnCriticalIO) throws Exception {
|
||||
return new ReplicationBackupActivation(server, DistributedPrimitiveManager.newInstanceOf(
|
||||
managerConfiguration.getClassName(), managerConfiguration.getProperties()), this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isSharedStore() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isBackup() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canScaleDown() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getScaleDownGroupName() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getScaleDownClustername() {
|
||||
return null;
|
||||
}
|
||||
|
||||
public String getClusterName() {
|
||||
return clusterName;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getBackupGroupName() {
|
||||
return groupName;
|
||||
}
|
||||
|
||||
public String getGroupName() {
|
||||
return groupName;
|
||||
}
|
||||
|
||||
public ReplicationPrimaryPolicy getLivePolicy() {
|
||||
return livePolicy;
|
||||
}
|
||||
|
||||
public int getMaxSavedReplicatedJournalsSize() {
|
||||
return maxSavedReplicatedJournalsSize;
|
||||
}
|
||||
|
||||
public long getRetryReplicationWait() {
|
||||
return retryReplicationWait;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean useQuorumManager() {
|
||||
return false;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,164 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.activemq.artemis.core.server.cluster.ha;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration;
|
||||
import org.apache.activemq.artemis.core.config.ha.DistributedPrimitiveManagerConfiguration;
|
||||
import org.apache.activemq.artemis.core.io.IOCriticalErrorListener;
|
||||
import org.apache.activemq.artemis.core.server.impl.ActiveMQServerImpl;
|
||||
import org.apache.activemq.artemis.core.server.impl.ReplicationPrimaryActivation;
|
||||
import org.apache.activemq.artemis.quorum.DistributedPrimitiveManager;
|
||||
|
||||
public class ReplicationPrimaryPolicy implements HAPolicy<ReplicationPrimaryActivation> {
|
||||
|
||||
private final ReplicationBackupPolicy backupPolicy;
|
||||
private final String clusterName;
|
||||
private final String groupName;
|
||||
private final long initialReplicationSyncTimeout;
|
||||
private final DistributedPrimitiveManagerConfiguration distributedManagerConfiguration;
|
||||
private final boolean allowAutoFailBack;
|
||||
private final String coordinationId;
|
||||
|
||||
private ReplicationPrimaryPolicy(ReplicationPrimaryPolicyConfiguration configuration,
|
||||
ReplicationBackupPolicy backupPolicy,
|
||||
boolean allowAutoFailBack) {
|
||||
Objects.requireNonNull(backupPolicy);
|
||||
clusterName = configuration.getClusterName();
|
||||
groupName = configuration.getGroupName();
|
||||
initialReplicationSyncTimeout = configuration.getInitialReplicationSyncTimeout();
|
||||
distributedManagerConfiguration = configuration.getDistributedManagerConfiguration();
|
||||
coordinationId = configuration.getCoordinationId();
|
||||
this.allowAutoFailBack = allowAutoFailBack;
|
||||
this.backupPolicy = backupPolicy;
|
||||
}
|
||||
|
||||
private ReplicationPrimaryPolicy(ReplicationPrimaryPolicyConfiguration config) {
|
||||
clusterName = config.getClusterName();
|
||||
groupName = config.getGroupName();
|
||||
coordinationId = config.getCoordinationId();
|
||||
initialReplicationSyncTimeout = config.getInitialReplicationSyncTimeout();
|
||||
distributedManagerConfiguration = config.getDistributedManagerConfiguration();
|
||||
this.allowAutoFailBack = false;
|
||||
backupPolicy = ReplicationBackupPolicy.failback(config.getRetryReplicationWait(), config.getClusterName(),
|
||||
config.getGroupName(), this,
|
||||
config.getDistributedManagerConfiguration());
|
||||
}
|
||||
|
||||
/**
|
||||
* It creates a companion failing-over primary policy for a natural-born backup: it's allowed to allow auto fail-back
|
||||
* only if configured to do it.
|
||||
*/
|
||||
static ReplicationPrimaryPolicy failoverPolicy(long initialReplicationSyncTimeout,
|
||||
String groupName,
|
||||
String clusterName,
|
||||
ReplicationBackupPolicy replicaPolicy,
|
||||
boolean allowAutoFailback,
|
||||
DistributedPrimitiveManagerConfiguration distributedManagerConfiguration) {
|
||||
return new ReplicationPrimaryPolicy(ReplicationPrimaryPolicyConfiguration.withDefault()
|
||||
.setInitialReplicationSyncTimeout(initialReplicationSyncTimeout)
|
||||
.setGroupName(groupName)
|
||||
.setClusterName(clusterName)
|
||||
.setDistributedManagerConfiguration(distributedManagerConfiguration),
|
||||
replicaPolicy, allowAutoFailback);
|
||||
}
|
||||
|
||||
/**
|
||||
* It creates a primary policy that never allow auto fail-back.<br>
|
||||
* It's meant to be used for natural-born primary brokers: its backup policy is set to always try to fail-back.
|
||||
*/
|
||||
public static ReplicationPrimaryPolicy with(ReplicationPrimaryPolicyConfiguration configuration) {
|
||||
return new ReplicationPrimaryPolicy(configuration);
|
||||
}
|
||||
|
||||
public ReplicationBackupPolicy getBackupPolicy() {
|
||||
return backupPolicy;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ReplicationPrimaryActivation createActivation(ActiveMQServerImpl server,
|
||||
boolean wasLive,
|
||||
Map<String, Object> activationParams,
|
||||
IOCriticalErrorListener shutdownOnCriticalIO) throws Exception {
|
||||
return new ReplicationPrimaryActivation(server,
|
||||
DistributedPrimitiveManager.newInstanceOf(
|
||||
distributedManagerConfiguration.getClassName(),
|
||||
distributedManagerConfiguration.getProperties()), this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isSharedStore() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isBackup() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isWaitForActivation() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canScaleDown() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getBackupGroupName() {
|
||||
return groupName;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getScaleDownGroupName() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getScaleDownClustername() {
|
||||
return null;
|
||||
}
|
||||
|
||||
public boolean isAllowAutoFailBack() {
|
||||
return allowAutoFailBack;
|
||||
}
|
||||
|
||||
public String getClusterName() {
|
||||
return clusterName;
|
||||
}
|
||||
|
||||
public long getInitialReplicationSyncTimeout() {
|
||||
return initialReplicationSyncTimeout;
|
||||
}
|
||||
|
||||
public String getGroupName() {
|
||||
return groupName;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean useQuorumManager() {
|
||||
return false;
|
||||
}
|
||||
|
||||
public String getCoordinationId() {
|
||||
return coordinationId;
|
||||
}
|
||||
}
|
|
@ -28,11 +28,12 @@ import org.apache.activemq.artemis.core.client.impl.Topology;
|
|||
import org.apache.activemq.artemis.core.protocol.core.CoreRemotingConnection;
|
||||
import org.apache.activemq.artemis.core.protocol.core.impl.wireformat.ReplicationLiveIsStoppingMessage;
|
||||
import org.apache.activemq.artemis.core.server.ActiveMQServerLogger;
|
||||
import org.apache.activemq.artemis.core.server.LiveNodeLocator.BackupRegistrationListener;
|
||||
import org.apache.activemq.artemis.core.server.NetworkHealthCheck;
|
||||
import org.apache.activemq.artemis.core.server.NodeManager;
|
||||
import org.jboss.logging.Logger;
|
||||
|
||||
public class SharedNothingBackupQuorum implements Quorum, SessionFailureListener {
|
||||
public class SharedNothingBackupQuorum implements Quorum, SessionFailureListener, BackupRegistrationListener {
|
||||
|
||||
private static final Logger LOGGER = Logger.getLogger(SharedNothingBackupQuorum.class);
|
||||
|
||||
|
@ -236,13 +237,9 @@ public class SharedNothingBackupQuorum implements Quorum, SessionFailureListener
|
|||
}
|
||||
}
|
||||
|
||||
public void notifyRegistrationFailed() {
|
||||
signal = BACKUP_ACTIVATION.FAILURE_REPLICATING;
|
||||
latch.countDown();
|
||||
}
|
||||
|
||||
public void notifyAlreadyReplicating() {
|
||||
signal = BACKUP_ACTIVATION.ALREADY_REPLICATING;
|
||||
@Override
|
||||
public void onBackupRegistrationFailed(boolean alreadyReplicating) {
|
||||
signal = alreadyReplicating ? BACKUP_ACTIVATION.ALREADY_REPLICATING : BACKUP_ACTIVATION.FAILURE_REPLICATING;
|
||||
latch.countDown();
|
||||
}
|
||||
|
||||
|
|
|
@ -36,6 +36,7 @@ public class FileMoveManager {
|
|||
private static final Logger logger = Logger.getLogger(FileMoveManager.class);
|
||||
|
||||
private final File folder;
|
||||
private final String[] prefixesToPreserve;
|
||||
private int maxFolders;
|
||||
public static final String PREFIX = "oldreplica.";
|
||||
|
||||
|
@ -70,9 +71,10 @@ public class FileMoveManager {
|
|||
this(folder, -1);
|
||||
}
|
||||
|
||||
public FileMoveManager(File folder, int maxFolders) {
|
||||
public FileMoveManager(File folder, int maxFolders, String... prefixesToPreserve) {
|
||||
this.folder = folder;
|
||||
this.maxFolders = maxFolders;
|
||||
this.prefixesToPreserve = prefixesToPreserve != null ? Arrays.copyOf(prefixesToPreserve, prefixesToPreserve.length) : null;
|
||||
}
|
||||
|
||||
public int getMaxFolders() {
|
||||
|
@ -99,8 +101,23 @@ public class FileMoveManager {
|
|||
ActiveMQServerLogger.LOGGER.backupDeletingData(folder.getPath());
|
||||
for (String fileMove : files) {
|
||||
File fileFrom = new File(folder, fileMove);
|
||||
logger.tracef("deleting %s", fileFrom);
|
||||
deleteTree(fileFrom);
|
||||
if (prefixesToPreserve != null) {
|
||||
boolean skip = false;
|
||||
for (String prefixToPreserve : prefixesToPreserve) {
|
||||
if (fileMove.startsWith(prefixToPreserve)) {
|
||||
logger.tracef("skipping %s", fileFrom);
|
||||
skip = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!skip) {
|
||||
logger.tracef("deleting %s", fileFrom);
|
||||
deleteTree(fileFrom);
|
||||
}
|
||||
} else {
|
||||
logger.tracef("deleting %s", fileFrom);
|
||||
deleteTree(fileFrom);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Since we will create one folder, we are already taking that one into consideration
|
||||
|
@ -113,8 +130,26 @@ public class FileMoveManager {
|
|||
for (String fileMove : files) {
|
||||
File fileFrom = new File(folder, fileMove);
|
||||
File fileTo = new File(folderTo, fileMove);
|
||||
logger.tracef("doMove:: moving %s as %s", fileFrom, fileTo);
|
||||
Files.move(fileFrom.toPath(), fileTo.toPath());
|
||||
if (prefixesToPreserve != null) {
|
||||
boolean copy = false;
|
||||
for (String prefixToPreserve : prefixesToPreserve) {
|
||||
if (fileMove.startsWith(prefixToPreserve)) {
|
||||
logger.tracef("skipping %s", fileFrom);
|
||||
copy = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (copy) {
|
||||
logger.tracef("copying %s to %s", fileFrom, fileTo);
|
||||
Files.copy(fileFrom.toPath(), fileTo.toPath());
|
||||
} else {
|
||||
logger.tracef("doMove:: moving %s as %s", fileFrom, fileTo);
|
||||
Files.move(fileFrom.toPath(), fileTo.toPath());
|
||||
}
|
||||
} else {
|
||||
logger.tracef("doMove:: moving %s as %s", fileFrom, fileTo);
|
||||
Files.move(fileFrom.toPath(), fileTo.toPath());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -110,4 +110,8 @@ public abstract class Activation implements Runnable {
|
|||
public ReplicationManager getReplicationManager() {
|
||||
return null;
|
||||
}
|
||||
|
||||
public boolean isReplicaSync() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -109,7 +109,6 @@ import org.apache.activemq.artemis.core.postoffice.impl.LocalQueueBinding;
|
|||
import org.apache.activemq.artemis.core.postoffice.impl.PostOfficeImpl;
|
||||
import org.apache.activemq.artemis.core.remoting.server.RemotingService;
|
||||
import org.apache.activemq.artemis.core.remoting.server.impl.RemotingServiceImpl;
|
||||
import org.apache.activemq.artemis.core.replication.ReplicationEndpoint;
|
||||
import org.apache.activemq.artemis.core.replication.ReplicationManager;
|
||||
import org.apache.activemq.artemis.core.security.CheckType;
|
||||
import org.apache.activemq.artemis.core.security.Role;
|
||||
|
@ -660,7 +659,7 @@ public class ActiveMQServerImpl implements ActiveMQServer {
|
|||
afterActivationCreated.run();
|
||||
} catch (Throwable e) {
|
||||
logger.warn(e.getMessage(), e); // just debug, this is not supposed to happend, and if it does
|
||||
// it will be embedeed code from tests
|
||||
// it will be embedded code from tests
|
||||
}
|
||||
afterActivationCreated = null;
|
||||
}
|
||||
|
@ -797,14 +796,6 @@ public class ActiveMQServerImpl implements ActiveMQServer {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public ReplicationEndpoint getReplicationEndpoint() {
|
||||
if (activation instanceof SharedNothingBackupActivation) {
|
||||
return ((SharedNothingBackupActivation) activation).getReplicationEndpoint();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void unlockActivation() {
|
||||
activationLock.release();
|
||||
|
@ -921,7 +912,7 @@ public class ActiveMQServerImpl implements ActiveMQServer {
|
|||
return threadPool;
|
||||
}
|
||||
|
||||
public void setActivation(SharedNothingLiveActivation activation) {
|
||||
public void setActivation(Activation activation) {
|
||||
this.activation = activation;
|
||||
}
|
||||
|
||||
|
@ -1145,19 +1136,7 @@ public class ActiveMQServerImpl implements ActiveMQServer {
|
|||
|
||||
@Override
|
||||
public boolean isReplicaSync() {
|
||||
if (activation instanceof SharedNothingLiveActivation) {
|
||||
ReplicationManager replicationManager = getReplicationManager();
|
||||
|
||||
if (replicationManager == null) {
|
||||
return false;
|
||||
} else {
|
||||
return !replicationManager.isSynchronizing();
|
||||
}
|
||||
} else if (activation instanceof SharedNothingBackupActivation) {
|
||||
return ((SharedNothingBackupActivation) activation).isRemoteBackupUpToDate();
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
return activation.isReplicaSync();
|
||||
}
|
||||
|
||||
public void stop(boolean failoverOnServerShutdown, final boolean criticalIOError, boolean restarting) {
|
||||
|
@ -2898,6 +2877,8 @@ public class ActiveMQServerImpl implements ActiveMQServer {
|
|||
public String toString() {
|
||||
if (identity != null) {
|
||||
return "ActiveMQServerImpl::" + identity;
|
||||
} else if (configuration != null && configuration.getName() != null) {
|
||||
return "ActiveMQServerImpl::" + "name=" + configuration.getName();
|
||||
}
|
||||
return "ActiveMQServerImpl::" + (nodeManager != null ? "serverUUID=" + nodeManager.getUUID() : "");
|
||||
}
|
||||
|
@ -3116,7 +3097,7 @@ public class ActiveMQServerImpl implements ActiveMQServer {
|
|||
postOffice = new PostOfficeImpl(this, storageManager, pagingManager, queueFactory, managementService, configuration.getMessageExpiryScanPeriod(), configuration.getAddressQueueScanPeriod(), configuration.getWildcardConfiguration(), configuration.getIDCacheSize(), configuration.isPersistIDCache(), addressSettingsRepository);
|
||||
|
||||
// This can't be created until node id is set
|
||||
clusterManager = new ClusterManager(executorFactory, this, postOffice, scheduledPool, managementService, configuration, nodeManager, haPolicy.isBackup());
|
||||
clusterManager = new ClusterManager(executorFactory, this, postOffice, scheduledPool, managementService, configuration, nodeManager, haPolicy.useQuorumManager());
|
||||
|
||||
federationManager = new FederationManager(this);
|
||||
|
||||
|
@ -4191,10 +4172,16 @@ public class ActiveMQServerImpl implements ActiveMQServer {
|
|||
* move any older data away and log a warning about it.
|
||||
*/
|
||||
void moveServerData(int maxSavedReplicated) throws IOException {
|
||||
moveServerData(maxSavedReplicated, false);
|
||||
}
|
||||
|
||||
void moveServerData(int maxSavedReplicated, boolean preserveLockFiles) throws IOException {
|
||||
File[] dataDirs = new File[]{configuration.getBindingsLocation(), configuration.getJournalLocation(), configuration.getPagingLocation(), configuration.getLargeMessagesLocation()};
|
||||
|
||||
for (File data : dataDirs) {
|
||||
FileMoveManager moveManager = new FileMoveManager(data, maxSavedReplicated);
|
||||
final boolean isLockFolder = preserveLockFiles ? data.equals(configuration.getNodeManagerLockLocation()) : false;
|
||||
final String[] lockPrefixes = isLockFolder ? new String[]{FileBasedNodeManager.SERVER_LOCK_NAME, "serverlock"} : null;
|
||||
FileMoveManager moveManager = new FileMoveManager(data, maxSavedReplicated, lockPrefixes);
|
||||
moveManager.doMove();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -29,7 +29,6 @@ import org.apache.activemq.artemis.api.core.Pair;
|
|||
import org.apache.activemq.artemis.api.core.TransportConfiguration;
|
||||
import org.apache.activemq.artemis.api.core.client.TopologyMember;
|
||||
import org.apache.activemq.artemis.core.server.LiveNodeLocator;
|
||||
import org.apache.activemq.artemis.core.server.cluster.qourum.SharedNothingBackupQuorum;
|
||||
import org.apache.activemq.artemis.utils.ConcurrentUtil;
|
||||
|
||||
/**
|
||||
|
@ -47,8 +46,9 @@ public class AnyLiveNodeLocatorForReplication extends LiveNodeLocator {
|
|||
|
||||
private String nodeID;
|
||||
|
||||
public AnyLiveNodeLocatorForReplication(SharedNothingBackupQuorum backupQuorum, ActiveMQServerImpl server, long retryReplicationWait) {
|
||||
super(backupQuorum);
|
||||
public AnyLiveNodeLocatorForReplication(BackupRegistrationListener backupRegistrationListener,
|
||||
ActiveMQServerImpl server, long retryReplicationWait) {
|
||||
super(backupRegistrationListener);
|
||||
this.server = server;
|
||||
this.retryReplicationWait = retryReplicationWait;
|
||||
}
|
||||
|
|
|
@ -20,6 +20,7 @@ import java.io.File;
|
|||
import java.io.IOException;
|
||||
import java.io.RandomAccessFile;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.channels.FileChannel;
|
||||
|
||||
import org.apache.activemq.artemis.core.server.ActiveMQServerLogger;
|
||||
|
@ -27,17 +28,68 @@ import org.apache.activemq.artemis.core.server.NodeManager;
|
|||
import org.apache.activemq.artemis.utils.UUID;
|
||||
import org.apache.activemq.artemis.utils.UUIDGenerator;
|
||||
|
||||
import static java.nio.file.StandardOpenOption.CREATE;
|
||||
import static java.nio.file.StandardOpenOption.READ;
|
||||
import static java.nio.file.StandardOpenOption.WRITE;
|
||||
|
||||
public abstract class FileBasedNodeManager extends NodeManager {
|
||||
|
||||
protected static final byte FIRST_TIME_START = '0';
|
||||
public static final String SERVER_LOCK_NAME = "server.lock";
|
||||
public static final String SERVER_ACTIVATION_SEQUENCE_NAME = "server.activation.sequence";
|
||||
private static final String ACCESS_MODE = "rw";
|
||||
private final File directory;
|
||||
protected FileChannel channel;
|
||||
protected FileChannel activationSequenceChannel;
|
||||
|
||||
public FileBasedNodeManager(boolean replicatedBackup, File directory) {
|
||||
super(replicatedBackup);
|
||||
this.directory = directory;
|
||||
if (directory != null) {
|
||||
directory.mkdirs();
|
||||
}
|
||||
}
|
||||
|
||||
protected void useActivationSequenceChannel() throws IOException {
|
||||
if (activationSequenceChannel != null) {
|
||||
return;
|
||||
}
|
||||
activationSequenceChannel = FileChannel.open(newFile(SERVER_ACTIVATION_SEQUENCE_NAME).toPath(), READ, WRITE, CREATE);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long readNodeActivationSequence() throws NodeManagerException {
|
||||
if (!isStarted()) {
|
||||
throw new NodeManagerException(new IllegalStateException("node manager must be started first"));
|
||||
}
|
||||
try {
|
||||
useActivationSequenceChannel();
|
||||
ByteBuffer tmpBuffer = ByteBuffer.allocate(Long.BYTES).order(ByteOrder.BIG_ENDIAN);
|
||||
if (activationSequenceChannel.read(tmpBuffer, 0) != Long.BYTES) {
|
||||
return 0;
|
||||
}
|
||||
tmpBuffer.flip();
|
||||
return tmpBuffer.getLong(0);
|
||||
} catch (IOException ie) {
|
||||
throw new NodeManagerException(ie);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeNodeActivationSequence(long version) throws NodeManagerException {
|
||||
if (!isStarted()) {
|
||||
throw new NodeManagerException(new IllegalStateException("node manager must be started first"));
|
||||
}
|
||||
try {
|
||||
useActivationSequenceChannel();
|
||||
ByteBuffer tmpBuffer = ByteBuffer.allocate(Long.BYTES).order(ByteOrder.BIG_ENDIAN);
|
||||
tmpBuffer.putLong(0, version);
|
||||
activationSequenceChannel.write(tmpBuffer, 0);
|
||||
activationSequenceChannel.force(false);
|
||||
setNodeActivationSequence(version);
|
||||
} catch (IOException ie) {
|
||||
throw new NodeManagerException(ie);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -101,12 +153,8 @@ public abstract class FileBasedNodeManager extends NodeManager {
|
|||
createNodeId();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return
|
||||
*/
|
||||
protected final File newFile(final String fileName) {
|
||||
File file = new File(directory, fileName);
|
||||
return file;
|
||||
return new File(directory, fileName);
|
||||
}
|
||||
|
||||
protected final synchronized void createNodeId() throws IOException {
|
||||
|
@ -137,9 +185,20 @@ public abstract class FileBasedNodeManager extends NodeManager {
|
|||
@Override
|
||||
public synchronized void stop() throws Exception {
|
||||
FileChannel channelCopy = channel;
|
||||
if (channelCopy != null)
|
||||
channelCopy.close();
|
||||
super.stop();
|
||||
try {
|
||||
if (channelCopy != null)
|
||||
channelCopy.close();
|
||||
} finally {
|
||||
try {
|
||||
FileChannel dataVersionChannel = this.activationSequenceChannel;
|
||||
this.activationSequenceChannel = null;
|
||||
if (dataVersionChannel != null) {
|
||||
dataVersionChannel.close();
|
||||
}
|
||||
} finally {
|
||||
super.stop();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -0,0 +1,127 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.activemq.artemis.core.server.impl;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedList;
|
||||
import java.util.Queue;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.locks.Condition;
|
||||
import java.util.concurrent.locks.Lock;
|
||||
import java.util.concurrent.locks.ReentrantLock;
|
||||
|
||||
import org.apache.activemq.artemis.api.core.ActiveMQException;
|
||||
import org.apache.activemq.artemis.api.core.Pair;
|
||||
import org.apache.activemq.artemis.api.core.TransportConfiguration;
|
||||
import org.apache.activemq.artemis.api.core.client.TopologyMember;
|
||||
import org.apache.activemq.artemis.core.server.LiveNodeLocator;
|
||||
import org.apache.activemq.artemis.utils.ConcurrentUtil;
|
||||
|
||||
/**
|
||||
* It looks for a live server in the cluster with a specific NodeID
|
||||
*/
|
||||
public class NamedLiveNodeIdLocatorForReplication extends LiveNodeLocator {
|
||||
|
||||
private final Lock lock = new ReentrantLock();
|
||||
private final Condition condition = lock.newCondition();
|
||||
private final String nodeID;
|
||||
private final long retryReplicationWait;
|
||||
private final Queue<Pair<TransportConfiguration, TransportConfiguration>> liveConfigurations = new LinkedList<>();
|
||||
private final ArrayList<Pair<TransportConfiguration, TransportConfiguration>> triedConfigurations = new ArrayList<>();
|
||||
private boolean found;
|
||||
|
||||
public NamedLiveNodeIdLocatorForReplication(String nodeID,
|
||||
BackupRegistrationListener backupRegistrationListener,
|
||||
long retryReplicationWait) {
|
||||
super(backupRegistrationListener);
|
||||
this.nodeID = nodeID;
|
||||
this.retryReplicationWait = retryReplicationWait;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void locateNode() throws ActiveMQException {
|
||||
locateNode(-1L);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void locateNode(long timeout) throws ActiveMQException {
|
||||
try {
|
||||
lock.lock();
|
||||
if (liveConfigurations.size() == 0) {
|
||||
try {
|
||||
if (timeout != -1L) {
|
||||
ConcurrentUtil.await(condition, timeout);
|
||||
} else {
|
||||
while (liveConfigurations.size() == 0) {
|
||||
condition.await(retryReplicationWait, TimeUnit.MILLISECONDS);
|
||||
liveConfigurations.addAll(triedConfigurations);
|
||||
triedConfigurations.clear();
|
||||
}
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
//ignore
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
lock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void nodeUP(TopologyMember topologyMember, boolean last) {
|
||||
try {
|
||||
lock.lock();
|
||||
if (nodeID.equals(topologyMember.getNodeId()) && topologyMember.getLive() != null) {
|
||||
Pair<TransportConfiguration, TransportConfiguration> liveConfiguration = new Pair<>(topologyMember.getLive(), topologyMember.getBackup());
|
||||
if (!liveConfigurations.contains(liveConfiguration)) {
|
||||
liveConfigurations.add(liveConfiguration);
|
||||
}
|
||||
found = true;
|
||||
condition.signal();
|
||||
}
|
||||
} finally {
|
||||
lock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void nodeDown(long eventUID, String nodeID) {
|
||||
//no op
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getNodeID() {
|
||||
return found ? nodeID : null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Pair<TransportConfiguration, TransportConfiguration> getLiveConfiguration() {
|
||||
return liveConfigurations.peek();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void notifyRegistrationFailed(boolean alreadyReplicating) {
|
||||
try {
|
||||
lock.lock();
|
||||
triedConfigurations.add(liveConfigurations.poll());
|
||||
super.notifyRegistrationFailed(alreadyReplicating);
|
||||
} finally {
|
||||
lock.unlock();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -29,7 +29,6 @@ import org.apache.activemq.artemis.api.core.Pair;
|
|||
import org.apache.activemq.artemis.api.core.TransportConfiguration;
|
||||
import org.apache.activemq.artemis.api.core.client.TopologyMember;
|
||||
import org.apache.activemq.artemis.core.server.LiveNodeLocator;
|
||||
import org.apache.activemq.artemis.core.server.cluster.qourum.SharedNothingBackupQuorum;
|
||||
import org.apache.activemq.artemis.utils.ConcurrentUtil;
|
||||
|
||||
/**
|
||||
|
@ -48,8 +47,10 @@ public class NamedLiveNodeLocatorForReplication extends LiveNodeLocator {
|
|||
|
||||
private String nodeID;
|
||||
|
||||
public NamedLiveNodeLocatorForReplication(String backupGroupName, SharedNothingBackupQuorum quorumManager, long retryReplicationWait) {
|
||||
super(quorumManager);
|
||||
public NamedLiveNodeLocatorForReplication(String backupGroupName,
|
||||
BackupRegistrationListener backupRegistrationListener,
|
||||
long retryReplicationWait) {
|
||||
super(backupRegistrationListener);
|
||||
this.backupGroupName = backupGroupName;
|
||||
this.retryReplicationWait = retryReplicationWait;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,571 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.activemq.artemis.core.server.impl;
|
||||
|
||||
import javax.annotation.concurrent.GuardedBy;
|
||||
|
||||
import java.util.Objects;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import java.util.function.Consumer;
|
||||
|
||||
import org.apache.activemq.artemis.api.core.ActiveMQException;
|
||||
import org.apache.activemq.artemis.api.core.ActiveMQIllegalStateException;
|
||||
import org.apache.activemq.artemis.api.core.Pair;
|
||||
import org.apache.activemq.artemis.api.core.SimpleString;
|
||||
import org.apache.activemq.artemis.api.core.TransportConfiguration;
|
||||
import org.apache.activemq.artemis.core.protocol.core.Channel;
|
||||
import org.apache.activemq.artemis.core.replication.ReplicationEndpoint;
|
||||
import org.apache.activemq.artemis.core.server.ActiveMQServer;
|
||||
import org.apache.activemq.artemis.core.server.ActiveMQServerLogger;
|
||||
import org.apache.activemq.artemis.core.server.LiveNodeLocator;
|
||||
import org.apache.activemq.artemis.core.server.NodeManager;
|
||||
import org.apache.activemq.artemis.core.server.cluster.ClusterControl;
|
||||
import org.apache.activemq.artemis.core.server.cluster.ClusterController;
|
||||
import org.apache.activemq.artemis.core.server.cluster.ha.ReplicationBackupPolicy;
|
||||
import org.apache.activemq.artemis.quorum.DistributedLock;
|
||||
import org.apache.activemq.artemis.quorum.DistributedPrimitiveManager;
|
||||
import org.apache.activemq.artemis.quorum.UnavailableStateException;
|
||||
import org.jboss.logging.Logger;
|
||||
|
||||
import static org.apache.activemq.artemis.core.server.impl.ReplicationObserver.ReplicationFailure;
|
||||
import static org.apache.activemq.artemis.core.server.impl.quorum.ActivationSequenceStateMachine.ensureSequentialAccessToNodeData;
|
||||
import static org.apache.activemq.artemis.core.server.impl.quorum.ActivationSequenceStateMachine.tryActivate;
|
||||
|
||||
/**
|
||||
* This activation can be used by a primary while trying to fail-back ie {@code failback == true} or
|
||||
* by a natural-born backup ie {@code failback == false}.<br>
|
||||
*/
|
||||
public final class ReplicationBackupActivation extends Activation implements DistributedPrimitiveManager.UnavailableManagerListener {
|
||||
|
||||
private static final Logger LOGGER = Logger.getLogger(ReplicationBackupActivation.class);
|
||||
|
||||
private final ReplicationBackupPolicy policy;
|
||||
private final ActiveMQServerImpl activeMQServer;
|
||||
// This field is != null iff this node is a primary during a fail-back ie acting as a backup in order to become live again.
|
||||
private final String expectedNodeID;
|
||||
@GuardedBy("this")
|
||||
private boolean closed;
|
||||
private final DistributedPrimitiveManager distributedManager;
|
||||
// Used for monitoring purposes
|
||||
private volatile ReplicationObserver replicationObserver;
|
||||
// Used for testing purposes
|
||||
private volatile ReplicationEndpoint replicationEndpoint;
|
||||
// Used for testing purposes
|
||||
private Consumer<ReplicationEndpoint> onReplicationEndpointCreation;
|
||||
// Used to arbiter one-shot server stop/restart
|
||||
private final AtomicBoolean stopping;
|
||||
|
||||
public ReplicationBackupActivation(final ActiveMQServerImpl activeMQServer,
|
||||
final DistributedPrimitiveManager distributedManager,
|
||||
final ReplicationBackupPolicy policy) {
|
||||
this.activeMQServer = activeMQServer;
|
||||
if (policy.isTryFailback()) {
|
||||
final SimpleString serverNodeID = activeMQServer.getNodeID();
|
||||
if (serverNodeID == null || serverNodeID.isEmpty()) {
|
||||
throw new IllegalStateException("A failback activation must be biased around a specific NodeID");
|
||||
}
|
||||
this.expectedNodeID = serverNodeID.toString();
|
||||
} else {
|
||||
this.expectedNodeID = null;
|
||||
}
|
||||
this.distributedManager = distributedManager;
|
||||
this.policy = policy;
|
||||
this.replicationObserver = null;
|
||||
this.replicationEndpoint = null;
|
||||
this.stopping = new AtomicBoolean(false);
|
||||
}
|
||||
|
||||
/**
|
||||
* used for testing purposes.
|
||||
*/
|
||||
public DistributedPrimitiveManager getDistributedManager() {
|
||||
return distributedManager;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onUnavailableManagerEvent() {
|
||||
synchronized (this) {
|
||||
if (closed) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
LOGGER.info("Unavailable quorum service detected: try restart server");
|
||||
asyncRestartServer(activeMQServer, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* This util class exists because {@link LiveNodeLocator} need a {@link LiveNodeLocator.BackupRegistrationListener}
|
||||
* to forward backup registration failure events: this is used to switch on/off backup registration event listening
|
||||
* on an existing locator.
|
||||
*/
|
||||
private static final class RegistrationFailureForwarder implements LiveNodeLocator.BackupRegistrationListener, AutoCloseable {
|
||||
|
||||
private static final LiveNodeLocator.BackupRegistrationListener NOOP_LISTENER = ignore -> {
|
||||
};
|
||||
private volatile LiveNodeLocator.BackupRegistrationListener listener = NOOP_LISTENER;
|
||||
|
||||
public RegistrationFailureForwarder to(LiveNodeLocator.BackupRegistrationListener listener) {
|
||||
this.listener = listener;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onBackupRegistrationFailed(boolean alreadyReplicating) {
|
||||
listener.onBackupRegistrationFailed(alreadyReplicating);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
listener = NOOP_LISTENER;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
synchronized (this) {
|
||||
if (closed) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
try {
|
||||
distributedManager.start();
|
||||
final long nodeActivationSequence = activeMQServer.getNodeManager().readNodeActivationSequence();
|
||||
// only a backup with positive local activation sequence could contain valuable data
|
||||
if (nodeActivationSequence > 0) {
|
||||
final String nodeId = activeMQServer.getNodeManager().getNodeId().toString();
|
||||
DistributedLock liveLockWithInSyncReplica;
|
||||
while (true) {
|
||||
distributedManager.start();
|
||||
try {
|
||||
liveLockWithInSyncReplica = tryActivate(nodeId, nodeActivationSequence, distributedManager, LOGGER);
|
||||
break;
|
||||
} catch (UnavailableStateException canRecoverEx) {
|
||||
distributedManager.stop();
|
||||
}
|
||||
}
|
||||
if (liveLockWithInSyncReplica != null) {
|
||||
// retain state and start as live
|
||||
if (!activeMQServer.initialisePart1(false)) {
|
||||
return;
|
||||
}
|
||||
activeMQServer.setState(ActiveMQServerImpl.SERVER_STATE.STARTED);
|
||||
startAsLive(liveLockWithInSyncReplica);
|
||||
return;
|
||||
}
|
||||
}
|
||||
distributedManager.addUnavailableManagerListener(this);
|
||||
// Stop the previous node manager and create a new one with NodeManager::replicatedBackup == true:
|
||||
// NodeManager::start skip setup lock file with NodeID, until NodeManager::stopBackup is called.
|
||||
activeMQServer.resetNodeManager();
|
||||
// A primary need to preserve NodeID across runs
|
||||
activeMQServer.moveServerData(policy.getMaxSavedReplicatedJournalsSize(), policy.isTryFailback());
|
||||
activeMQServer.getNodeManager().start();
|
||||
if (!activeMQServer.initialisePart1(false)) {
|
||||
return;
|
||||
}
|
||||
synchronized (this) {
|
||||
if (closed)
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
final ClusterController clusterController = activeMQServer.getClusterManager().getClusterController();
|
||||
|
||||
LOGGER.infof("Apache ActiveMQ Artemis Backup Server version %s [%s] started, awaiting connection to a live cluster member to start replication", activeMQServer.getVersion().getFullVersion(),
|
||||
activeMQServer.toString());
|
||||
|
||||
clusterController.awaitConnectionToReplicationCluster();
|
||||
activeMQServer.getBackupManager().start();
|
||||
activeMQServer.setState(ActiveMQServerImpl.SERVER_STATE.STARTED);
|
||||
final DistributedLock liveLock = replicateAndFailover(clusterController);
|
||||
if (liveLock == null) {
|
||||
return;
|
||||
}
|
||||
startAsLive(liveLock);
|
||||
} catch (Exception e) {
|
||||
if ((e instanceof InterruptedException || e instanceof IllegalStateException) && !activeMQServer.isStarted()) {
|
||||
// do not log these errors if the server is being stopped.
|
||||
return;
|
||||
}
|
||||
ActiveMQServerLogger.LOGGER.initializationError(e);
|
||||
}
|
||||
}
|
||||
|
||||
private void startAsLive(final DistributedLock liveLock) throws Exception {
|
||||
activeMQServer.setHAPolicy(policy.getLivePolicy());
|
||||
|
||||
synchronized (activeMQServer) {
|
||||
if (!activeMQServer.isStarted()) {
|
||||
liveLock.close();
|
||||
return;
|
||||
}
|
||||
try {
|
||||
ensureSequentialAccessToNodeData(activeMQServer, distributedManager, LOGGER);
|
||||
} catch (Throwable fatal) {
|
||||
LOGGER.warn(fatal);
|
||||
// policy is already live one, but there's no activation yet: we can just stop
|
||||
asyncRestartServer(activeMQServer, false, false);
|
||||
throw new ActiveMQIllegalStateException("This server cannot ensure sequential access to broker data: activation is failed");
|
||||
}
|
||||
ActiveMQServerLogger.LOGGER.becomingLive(activeMQServer);
|
||||
// stopBackup is going to write the NodeID previously set on the NodeManager,
|
||||
// because activeMQServer.resetNodeManager() has created a NodeManager with replicatedBackup == true.
|
||||
activeMQServer.getNodeManager().stopBackup();
|
||||
activeMQServer.getStorageManager().start();
|
||||
activeMQServer.getBackupManager().activated();
|
||||
// IMPORTANT:
|
||||
// we're setting this activation JUST because it would allow the server to use its
|
||||
// getActivationChannelHandler to handle replication
|
||||
final ReplicationPrimaryActivation primaryActivation = new ReplicationPrimaryActivation(activeMQServer, distributedManager, policy.getLivePolicy());
|
||||
liveLock.addListener(primaryActivation);
|
||||
activeMQServer.setActivation(primaryActivation);
|
||||
activeMQServer.initialisePart2(false);
|
||||
// calling primaryActivation.stateChanged !isHelByCaller is necessary in case the lock was unavailable
|
||||
// before liveLock.addListener: just throwing an exception won't stop the broker.
|
||||
final boolean stillLive;
|
||||
try {
|
||||
stillLive = liveLock.isHeldByCaller();
|
||||
} catch (UnavailableStateException e) {
|
||||
LOGGER.warn(e);
|
||||
primaryActivation.onUnavailableLockEvent();
|
||||
throw new ActiveMQIllegalStateException("This server cannot check its role as a live: activation is failed");
|
||||
}
|
||||
if (!stillLive) {
|
||||
primaryActivation.onUnavailableLockEvent();
|
||||
throw new ActiveMQIllegalStateException("This server is not live anymore: activation is failed");
|
||||
}
|
||||
if (activeMQServer.getIdentity() != null) {
|
||||
ActiveMQServerLogger.LOGGER.serverIsLive(activeMQServer.getIdentity());
|
||||
} else {
|
||||
ActiveMQServerLogger.LOGGER.serverIsLive();
|
||||
}
|
||||
activeMQServer.completeActivation(true);
|
||||
}
|
||||
}
|
||||
|
||||
private LiveNodeLocator createLiveNodeLocator(final LiveNodeLocator.BackupRegistrationListener registrationListener) {
|
||||
if (expectedNodeID != null) {
|
||||
assert policy.isTryFailback();
|
||||
return new NamedLiveNodeIdLocatorForReplication(expectedNodeID, registrationListener, policy.getRetryReplicationWait());
|
||||
}
|
||||
return policy.getGroupName() == null ?
|
||||
new AnyLiveNodeLocatorForReplication(registrationListener, activeMQServer, policy.getRetryReplicationWait()) :
|
||||
new NamedLiveNodeLocatorForReplication(policy.getGroupName(), registrationListener, policy.getRetryReplicationWait());
|
||||
}
|
||||
|
||||
private DistributedLock replicateAndFailover(final ClusterController clusterController) throws ActiveMQException, InterruptedException {
|
||||
final RegistrationFailureForwarder registrationFailureForwarder = new RegistrationFailureForwarder();
|
||||
// node locator isn't stateless and contains a live-list of candidate nodes to connect too, hence
|
||||
// it MUST be reused for each replicateLive attempt
|
||||
final LiveNodeLocator nodeLocator = createLiveNodeLocator(registrationFailureForwarder);
|
||||
clusterController.addClusterTopologyListenerForReplication(nodeLocator);
|
||||
try {
|
||||
while (true) {
|
||||
synchronized (this) {
|
||||
if (closed) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
final ReplicationFailure failure = replicateLive(clusterController, nodeLocator, registrationFailureForwarder);
|
||||
if (failure == null) {
|
||||
Thread.sleep(clusterController.getRetryIntervalForReplicatedCluster());
|
||||
continue;
|
||||
}
|
||||
if (!activeMQServer.isStarted()) {
|
||||
return null;
|
||||
}
|
||||
LOGGER.debugf("ReplicationFailure = %s", failure);
|
||||
switch (failure) {
|
||||
case VoluntaryFailOver:
|
||||
case NonVoluntaryFailover:
|
||||
// from now on we're meant to stop:
|
||||
// - due to failover
|
||||
// - due to restart/stop
|
||||
if (!stopping.compareAndSet(false, true)) {
|
||||
return null;
|
||||
}
|
||||
// no more interested into these events: handling it manually from here
|
||||
distributedManager.removeUnavailableManagerListener(this);
|
||||
final long nodeActivationSequence = activeMQServer.getNodeManager().readNodeActivationSequence();
|
||||
final String nodeId = activeMQServer.getNodeManager().getNodeId().toString();
|
||||
DistributedLock liveLockWithInSyncReplica = null;
|
||||
if (nodeActivationSequence > 0) {
|
||||
try {
|
||||
liveLockWithInSyncReplica = tryActivate(nodeId, nodeActivationSequence, distributedManager, LOGGER);
|
||||
} catch (Throwable error) {
|
||||
// no need to retry here, can just restart as backup that will handle a more resilient tryActivate
|
||||
LOGGER.warn("Errored while attempting failover", error);
|
||||
liveLockWithInSyncReplica = null;
|
||||
}
|
||||
} else {
|
||||
LOGGER.warnf("We expect local activation sequence for NodeID = %s to be > 0 on a fail-over, while is %d", nodeId, nodeActivationSequence);
|
||||
}
|
||||
assert stopping.get();
|
||||
if (liveLockWithInSyncReplica != null) {
|
||||
return liveLockWithInSyncReplica;
|
||||
}
|
||||
ActiveMQServerLogger.LOGGER.restartingAsBackupBasedOnQuorumVoteResults();
|
||||
// let's ignore the stopping flag here, we're already in control of it
|
||||
asyncRestartServer(activeMQServer, true, false);
|
||||
return null;
|
||||
case RegistrationError:
|
||||
LOGGER.error("Stopping broker because of critical registration error");
|
||||
asyncRestartServer(activeMQServer, false);
|
||||
return null;
|
||||
case AlreadyReplicating:
|
||||
// can just retry here, data should be clean and nodeLocator
|
||||
// should remove the live node that has answered this
|
||||
LOGGER.info("Live broker was already replicating: retry sync with another live");
|
||||
continue;
|
||||
case ClosedObserver:
|
||||
return null;
|
||||
case BackupNotInSync:
|
||||
LOGGER.info("Replication failure while initial sync not yet completed: restart as backup");
|
||||
asyncRestartServer(activeMQServer, true);
|
||||
return null;
|
||||
case WrongNodeId:
|
||||
LOGGER.error("Stopping broker because of wrong node ID communication from live: maybe a misbehaving live?");
|
||||
asyncRestartServer(activeMQServer, false);
|
||||
return null;
|
||||
default:
|
||||
throw new AssertionError("Unsupported failure " + failure);
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
silentExecution("Error on cluster topology listener for replication cleanup", () -> clusterController.removeClusterTopologyListenerForReplication(nodeLocator));
|
||||
}
|
||||
}
|
||||
|
||||
private ReplicationObserver replicationObserver() {
|
||||
if (policy.isTryFailback()) {
|
||||
return ReplicationObserver.failbackObserver(activeMQServer.getNodeManager(), activeMQServer.getBackupManager(), activeMQServer.getScheduledPool(), expectedNodeID);
|
||||
}
|
||||
return ReplicationObserver.failoverObserver(activeMQServer.getNodeManager(), activeMQServer.getBackupManager(), activeMQServer.getScheduledPool());
|
||||
}
|
||||
|
||||
private ReplicationFailure replicateLive(final ClusterController clusterController,
|
||||
final LiveNodeLocator liveLocator,
|
||||
final RegistrationFailureForwarder registrationFailureForwarder) throws ActiveMQException {
|
||||
try (ReplicationObserver replicationObserver = replicationObserver();
|
||||
RegistrationFailureForwarder ignored = registrationFailureForwarder.to(replicationObserver)) {
|
||||
this.replicationObserver = replicationObserver;
|
||||
clusterController.addClusterTopologyListener(replicationObserver);
|
||||
// ReplicationError notifies backup registration failures to live locator -> forwarder -> observer
|
||||
final ReplicationError replicationError = new ReplicationError(liveLocator);
|
||||
clusterController.addIncomingInterceptorForReplication(replicationError);
|
||||
try {
|
||||
final ClusterControl liveControl = tryLocateAndConnectToLive(liveLocator, clusterController);
|
||||
if (liveControl == null) {
|
||||
return null;
|
||||
}
|
||||
try {
|
||||
final ReplicationEndpoint replicationEndpoint = tryAuthorizeAndAsyncRegisterAsBackupToLive(liveControl, replicationObserver);
|
||||
if (replicationEndpoint == null) {
|
||||
return ReplicationFailure.RegistrationError;
|
||||
}
|
||||
this.replicationEndpoint = replicationEndpoint;
|
||||
try {
|
||||
return replicationObserver.awaitReplicationFailure();
|
||||
} finally {
|
||||
this.replicationEndpoint = null;
|
||||
ActiveMQServerImpl.stopComponent(replicationEndpoint);
|
||||
closeChannelOf(replicationEndpoint);
|
||||
}
|
||||
} finally {
|
||||
silentExecution("Error on live control close", liveControl::close);
|
||||
}
|
||||
} finally {
|
||||
silentExecution("Error on cluster topology listener cleanup", () -> clusterController.removeClusterTopologyListener(replicationObserver));
|
||||
silentExecution("Error while removing incoming interceptor for replication", () -> clusterController.removeIncomingInterceptorForReplication(replicationError));
|
||||
}
|
||||
} finally {
|
||||
this.replicationObserver = null;
|
||||
}
|
||||
}
|
||||
|
||||
private static void silentExecution(String debugErrorMessage, Runnable task) {
|
||||
try {
|
||||
task.run();
|
||||
} catch (Throwable ignore) {
|
||||
LOGGER.debug(debugErrorMessage, ignore);
|
||||
}
|
||||
}
|
||||
|
||||
private static void closeChannelOf(final ReplicationEndpoint replicationEndpoint) {
|
||||
if (replicationEndpoint == null) {
|
||||
return;
|
||||
}
|
||||
if (replicationEndpoint.getChannel() != null) {
|
||||
silentExecution("Error while closing replication endpoint channel", () -> replicationEndpoint.getChannel().close());
|
||||
replicationEndpoint.setChannel(null);
|
||||
}
|
||||
}
|
||||
|
||||
private boolean asyncRestartServer(final ActiveMQServer server, boolean restart) {
|
||||
return asyncRestartServer(server, restart, true);
|
||||
}
|
||||
|
||||
private boolean asyncRestartServer(final ActiveMQServer server, boolean restart, boolean checkStopping) {
|
||||
if (checkStopping) {
|
||||
if (!stopping.compareAndSet(false, true)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
new Thread(() -> {
|
||||
if (server.getState() != ActiveMQServer.SERVER_STATE.STOPPED && server.getState() != ActiveMQServer.SERVER_STATE.STOPPING) {
|
||||
try {
|
||||
server.stop(!restart);
|
||||
if (restart) {
|
||||
server.start();
|
||||
}
|
||||
} catch (Exception e) {
|
||||
if (restart) {
|
||||
ActiveMQServerLogger.LOGGER.errorRestartingBackupServer(e, server);
|
||||
} else {
|
||||
ActiveMQServerLogger.LOGGER.errorStoppingServer(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}).start();
|
||||
return true;
|
||||
}
|
||||
|
||||
private ClusterControl tryLocateAndConnectToLive(final LiveNodeLocator liveLocator,
|
||||
final ClusterController clusterController) throws ActiveMQException {
|
||||
liveLocator.locateNode();
|
||||
final Pair<TransportConfiguration, TransportConfiguration> possibleLive = liveLocator.getLiveConfiguration();
|
||||
final String nodeID = liveLocator.getNodeID();
|
||||
if (nodeID == null) {
|
||||
throw new RuntimeException("Could not establish the connection with any live");
|
||||
}
|
||||
if (!policy.isTryFailback()) {
|
||||
assert expectedNodeID == null;
|
||||
activeMQServer.getNodeManager().setNodeID(nodeID);
|
||||
} else {
|
||||
assert expectedNodeID.equals(nodeID);
|
||||
}
|
||||
if (possibleLive == null) {
|
||||
return null;
|
||||
}
|
||||
final ClusterControl liveControl = tryConnectToNodeInReplicatedCluster(clusterController, possibleLive.getA());
|
||||
if (liveControl != null) {
|
||||
return liveControl;
|
||||
}
|
||||
return tryConnectToNodeInReplicatedCluster(clusterController, possibleLive.getB());
|
||||
}
|
||||
|
||||
private static ClusterControl tryConnectToNodeInReplicatedCluster(final ClusterController clusterController,
|
||||
final TransportConfiguration tc) {
|
||||
try {
|
||||
if (tc != null) {
|
||||
return clusterController.connectToNodeInReplicatedCluster(tc);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
LOGGER.debug(e.getMessage(), e);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close(final boolean permanently, final boolean restarting) throws Exception {
|
||||
synchronized (this) {
|
||||
closed = true;
|
||||
final ReplicationObserver replicationObserver = this.replicationObserver;
|
||||
if (replicationObserver != null) {
|
||||
replicationObserver.close();
|
||||
}
|
||||
}
|
||||
//we have to check as the server policy may have changed
|
||||
try {
|
||||
if (activeMQServer.getHAPolicy().isBackup()) {
|
||||
// To avoid a NPE cause by the stop
|
||||
final NodeManager nodeManager = activeMQServer.getNodeManager();
|
||||
|
||||
activeMQServer.interruptActivationThread(nodeManager);
|
||||
|
||||
if (nodeManager != null) {
|
||||
nodeManager.stopBackup();
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
// this one need to happen after interrupting the activation thread
|
||||
// in order to unblock distributedManager::start
|
||||
distributedManager.stop();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void preStorageClose() throws Exception {
|
||||
// TODO replication endpoint close?
|
||||
}
|
||||
|
||||
private ReplicationEndpoint tryAuthorizeAndAsyncRegisterAsBackupToLive(final ClusterControl liveControl,
|
||||
final ReplicationObserver liveObserver) {
|
||||
ReplicationEndpoint replicationEndpoint = null;
|
||||
try {
|
||||
liveControl.getSessionFactory().setReconnectAttempts(1);
|
||||
liveObserver.listenConnectionFailuresOf(liveControl.getSessionFactory());
|
||||
liveControl.authorize();
|
||||
replicationEndpoint = new ReplicationEndpoint(activeMQServer, policy.isTryFailback(), liveObserver);
|
||||
final Consumer<ReplicationEndpoint> onReplicationEndpointCreation = this.onReplicationEndpointCreation;
|
||||
if (onReplicationEndpointCreation != null) {
|
||||
onReplicationEndpointCreation.accept(replicationEndpoint);
|
||||
}
|
||||
replicationEndpoint.setExecutor(activeMQServer.getExecutorFactory().getExecutor());
|
||||
connectToReplicationEndpoint(liveControl, replicationEndpoint);
|
||||
replicationEndpoint.start();
|
||||
liveControl.announceReplicatingBackupToLive(policy.isTryFailback(), policy.getClusterName());
|
||||
return replicationEndpoint;
|
||||
} catch (Exception e) {
|
||||
ActiveMQServerLogger.LOGGER.replicationStartProblem(e);
|
||||
ActiveMQServerImpl.stopComponent(replicationEndpoint);
|
||||
closeChannelOf(replicationEndpoint);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private static boolean connectToReplicationEndpoint(final ClusterControl liveControl,
|
||||
final ReplicationEndpoint replicationEndpoint) {
|
||||
final Channel replicationChannel = liveControl.createReplicationChannel();
|
||||
replicationChannel.setHandler(replicationEndpoint);
|
||||
replicationEndpoint.setChannel(replicationChannel);
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isReplicaSync() {
|
||||
// NOTE: this method is just for monitoring purposes, not suitable to perform logic!
|
||||
// During a failover this backup won't have any active liveObserver and will report `false`!!
|
||||
final ReplicationObserver liveObserver = this.replicationObserver;
|
||||
if (liveObserver == null) {
|
||||
return false;
|
||||
}
|
||||
return liveObserver.isBackupUpToDate();
|
||||
}
|
||||
|
||||
public ReplicationEndpoint getReplicationEndpoint() {
|
||||
return replicationEndpoint;
|
||||
}
|
||||
|
||||
/**
|
||||
* This must be used just for testing purposes.
|
||||
*/
|
||||
public void spyReplicationEndpointCreation(Consumer<ReplicationEndpoint> onReplicationEndpointCreation) {
|
||||
Objects.requireNonNull(onReplicationEndpointCreation);
|
||||
this.onReplicationEndpointCreation = onReplicationEndpointCreation;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,332 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.activemq.artemis.core.server.impl;
|
||||
|
||||
import javax.annotation.concurrent.GuardedBy;
|
||||
import java.util.Objects;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
import java.util.concurrent.ScheduledExecutorService;
|
||||
import java.util.concurrent.ScheduledFuture;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import org.apache.activemq.artemis.api.core.ActiveMQException;
|
||||
import org.apache.activemq.artemis.api.core.client.ClusterTopologyListener;
|
||||
import org.apache.activemq.artemis.api.core.client.SessionFailureListener;
|
||||
import org.apache.activemq.artemis.api.core.client.TopologyMember;
|
||||
import org.apache.activemq.artemis.core.client.impl.ClientSessionFactoryInternal;
|
||||
import org.apache.activemq.artemis.core.protocol.core.CoreRemotingConnection;
|
||||
import org.apache.activemq.artemis.core.protocol.core.impl.wireformat.ReplicationLiveIsStoppingMessage;
|
||||
import org.apache.activemq.artemis.core.replication.ReplicationEndpoint;
|
||||
import org.apache.activemq.artemis.core.server.LiveNodeLocator.BackupRegistrationListener;
|
||||
import org.apache.activemq.artemis.core.server.NodeManager;
|
||||
import org.apache.activemq.artemis.core.server.cluster.BackupManager;
|
||||
import org.jboss.logging.Logger;
|
||||
|
||||
final class ReplicationObserver implements ClusterTopologyListener, SessionFailureListener, BackupRegistrationListener, ReplicationEndpoint.ReplicationEndpointEventListener, AutoCloseable {
|
||||
|
||||
private static final Logger LOGGER = Logger.getLogger(ReplicationObserver.class);
|
||||
|
||||
public enum ReplicationFailure {
|
||||
VoluntaryFailOver, BackupNotInSync, NonVoluntaryFailover, RegistrationError, AlreadyReplicating, ClosedObserver, WrongNodeId;
|
||||
}
|
||||
|
||||
private final NodeManager nodeManager;
|
||||
private final BackupManager backupManager;
|
||||
private final ScheduledExecutorService scheduledPool;
|
||||
private final boolean failback;
|
||||
private final String expectedNodeID;
|
||||
private final CompletableFuture<ReplicationFailure> replicationFailure;
|
||||
|
||||
@GuardedBy("this")
|
||||
private ClientSessionFactoryInternal sessionFactory;
|
||||
@GuardedBy("this")
|
||||
private CoreRemotingConnection connection;
|
||||
@GuardedBy("this")
|
||||
private ScheduledFuture<?> forcedFailover;
|
||||
|
||||
private volatile String liveID;
|
||||
private volatile boolean backupUpToDate;
|
||||
private volatile boolean closed;
|
||||
|
||||
/**
|
||||
* This is a safety net in case the live sends the first {@link ReplicationLiveIsStoppingMessage}
|
||||
* with code {@link org.apache.activemq.artemis.core.protocol.core.impl.wireformat.ReplicationLiveIsStoppingMessage.LiveStopping#STOP_CALLED} and crashes before sending the second with
|
||||
* {@link org.apache.activemq.artemis.core.protocol.core.impl.wireformat.ReplicationLiveIsStoppingMessage.LiveStopping#FAIL_OVER}.
|
||||
* <p>
|
||||
* If the second message does come within this dead line, we fail over anyway.
|
||||
*/
|
||||
public static final int WAIT_TIME_AFTER_FIRST_LIVE_STOPPING_MSG = 60;
|
||||
|
||||
private ReplicationObserver(final NodeManager nodeManager,
|
||||
final BackupManager backupManager,
|
||||
final ScheduledExecutorService scheduledPool,
|
||||
final boolean failback,
|
||||
final String expectedNodeID) {
|
||||
this.nodeManager = nodeManager;
|
||||
this.backupManager = backupManager;
|
||||
this.scheduledPool = scheduledPool;
|
||||
this.failback = failback;
|
||||
this.expectedNodeID = expectedNodeID;
|
||||
this.replicationFailure = new CompletableFuture<>();
|
||||
|
||||
this.sessionFactory = null;
|
||||
this.connection = null;
|
||||
this.forcedFailover = null;
|
||||
|
||||
this.liveID = null;
|
||||
this.backupUpToDate = false;
|
||||
this.closed = false;
|
||||
}
|
||||
|
||||
public static ReplicationObserver failbackObserver(final NodeManager nodeManager,
|
||||
final BackupManager backupManager,
|
||||
final ScheduledExecutorService scheduledPool,
|
||||
final String expectedNodeID) {
|
||||
Objects.requireNonNull(expectedNodeID);
|
||||
return new ReplicationObserver(nodeManager, backupManager, scheduledPool, true, expectedNodeID);
|
||||
}
|
||||
|
||||
public static ReplicationObserver failoverObserver(final NodeManager nodeManager,
|
||||
final BackupManager backupManager,
|
||||
final ScheduledExecutorService scheduledPool) {
|
||||
return new ReplicationObserver(nodeManager, backupManager, scheduledPool, false, null);
|
||||
}
|
||||
|
||||
private void onLiveDown(boolean voluntaryFailover) {
|
||||
if (closed || replicationFailure.isDone()) {
|
||||
return;
|
||||
}
|
||||
synchronized (this) {
|
||||
if (closed || replicationFailure.isDone()) {
|
||||
return;
|
||||
}
|
||||
stopForcedFailoverAfterDelay();
|
||||
unlistenConnectionFailures();
|
||||
if (!isRemoteBackupUpToDate()) {
|
||||
replicationFailure.complete(ReplicationFailure.BackupNotInSync);
|
||||
} else if (voluntaryFailover) {
|
||||
replicationFailure.complete(ReplicationFailure.VoluntaryFailOver);
|
||||
} else {
|
||||
replicationFailure.complete(ReplicationFailure.NonVoluntaryFailover);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void nodeDown(long eventUID, String nodeID) {
|
||||
// ignore it during a failback:
|
||||
// a failing slave close all connections but the one used for replication
|
||||
// triggering a nodeDown before the restarted master receive a STOP_CALLED from it.
|
||||
// This can make master to fire a useless quorum vote during a normal failback.
|
||||
if (failback) {
|
||||
return;
|
||||
}
|
||||
if (nodeID.equals(liveID)) {
|
||||
onLiveDown(false);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void nodeUP(TopologyMember member, boolean last) {
|
||||
}
|
||||
|
||||
/**
|
||||
* if the connection to our replicated live goes down then decide on an action
|
||||
*/
|
||||
@Override
|
||||
public void connectionFailed(ActiveMQException exception, boolean failedOver) {
|
||||
onLiveDown(false);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void connectionFailed(final ActiveMQException me, boolean failedOver, String scaleDownTargetNodeID) {
|
||||
connectionFailed(me, failedOver);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void beforeReconnect(ActiveMQException exception) {
|
||||
//noop
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
if (closed) {
|
||||
return;
|
||||
}
|
||||
synchronized (this) {
|
||||
if (closed) {
|
||||
return;
|
||||
}
|
||||
unlistenConnectionFailures();
|
||||
closed = true;
|
||||
replicationFailure.complete(ReplicationFailure.ClosedObserver);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param liveSessionFactory the session factory used to connect to the live server
|
||||
*/
|
||||
public synchronized void listenConnectionFailuresOf(final ClientSessionFactoryInternal liveSessionFactory) {
|
||||
if (closed) {
|
||||
throw new IllegalStateException("the observer is closed: cannot listen to any failures");
|
||||
}
|
||||
if (sessionFactory != null || connection != null) {
|
||||
throw new IllegalStateException("this observer is already listening to other session factory failures");
|
||||
}
|
||||
this.sessionFactory = liveSessionFactory;
|
||||
//belts and braces, there are circumstances where the connection listener doesn't get called but the session does.
|
||||
this.sessionFactory.addFailureListener(this);
|
||||
connection = (CoreRemotingConnection) liveSessionFactory.getConnection();
|
||||
connection.addFailureListener(this);
|
||||
}
|
||||
|
||||
public synchronized void unlistenConnectionFailures() {
|
||||
if (connection != null) {
|
||||
connection.removeFailureListener(this);
|
||||
connection = null;
|
||||
}
|
||||
if (sessionFactory != null) {
|
||||
sessionFactory.removeFailureListener(this);
|
||||
sessionFactory = null;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onBackupRegistrationFailed(boolean alreadyReplicating) {
|
||||
if (closed || replicationFailure.isDone()) {
|
||||
return;
|
||||
}
|
||||
synchronized (this) {
|
||||
if (closed || replicationFailure.isDone()) {
|
||||
return;
|
||||
}
|
||||
stopForcedFailoverAfterDelay();
|
||||
unlistenConnectionFailures();
|
||||
replicationFailure.complete(alreadyReplicating ? ReplicationFailure.AlreadyReplicating : ReplicationFailure.RegistrationError);
|
||||
}
|
||||
}
|
||||
|
||||
public ReplicationFailure awaitReplicationFailure() {
|
||||
try {
|
||||
return replicationFailure.get();
|
||||
} catch (Throwable e) {
|
||||
return ReplicationFailure.ClosedObserver;
|
||||
}
|
||||
}
|
||||
|
||||
private synchronized void scheduleForcedFailoverAfterDelay() {
|
||||
if (forcedFailover != null) {
|
||||
return;
|
||||
}
|
||||
forcedFailover = scheduledPool.schedule(() -> onLiveDown(false), WAIT_TIME_AFTER_FIRST_LIVE_STOPPING_MSG, TimeUnit.SECONDS);
|
||||
}
|
||||
|
||||
private synchronized void stopForcedFailoverAfterDelay() {
|
||||
if (forcedFailover == null) {
|
||||
return;
|
||||
}
|
||||
forcedFailover.cancel(false);
|
||||
forcedFailover = null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onRemoteBackupUpToDate() {
|
||||
if (backupUpToDate || closed || replicationFailure.isDone()) {
|
||||
return;
|
||||
}
|
||||
synchronized (this) {
|
||||
if (backupUpToDate || closed || replicationFailure.isDone()) {
|
||||
return;
|
||||
}
|
||||
assert liveID != null;
|
||||
backupManager.announceBackup();
|
||||
backupUpToDate = true;
|
||||
}
|
||||
}
|
||||
|
||||
public boolean isBackupUpToDate() {
|
||||
return backupUpToDate;
|
||||
}
|
||||
|
||||
public String getLiveID() {
|
||||
return liveID;
|
||||
}
|
||||
|
||||
private boolean validateNodeId(String nodeID) {
|
||||
if (nodeID == null) {
|
||||
return false;
|
||||
}
|
||||
final String existingNodeId = this.liveID;
|
||||
if (existingNodeId == null) {
|
||||
if (!failback) {
|
||||
return true;
|
||||
}
|
||||
return nodeID.equals(expectedNodeID);
|
||||
}
|
||||
return existingNodeId.equals(nodeID);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onLiveNodeId(String nodeId) {
|
||||
if (closed || replicationFailure.isDone()) {
|
||||
return;
|
||||
}
|
||||
final String existingNodeId = this.liveID;
|
||||
if (existingNodeId != null && existingNodeId.equals(nodeId)) {
|
||||
return;
|
||||
}
|
||||
synchronized (this) {
|
||||
if (closed || replicationFailure.isDone()) {
|
||||
return;
|
||||
}
|
||||
if (!validateNodeId(nodeId)) {
|
||||
stopForcedFailoverAfterDelay();
|
||||
unlistenConnectionFailures();
|
||||
replicationFailure.complete(ReplicationFailure.WrongNodeId);
|
||||
} else if (liveID == null) {
|
||||
liveID = nodeId;
|
||||
nodeManager.setNodeID(nodeId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public boolean isRemoteBackupUpToDate() {
|
||||
return backupUpToDate;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onLiveStopping(ReplicationLiveIsStoppingMessage.LiveStopping finalMessage) {
|
||||
if (closed || replicationFailure.isDone()) {
|
||||
return;
|
||||
}
|
||||
synchronized (this) {
|
||||
if (closed || replicationFailure.isDone()) {
|
||||
return;
|
||||
}
|
||||
switch (finalMessage) {
|
||||
case STOP_CALLED:
|
||||
scheduleForcedFailoverAfterDelay();
|
||||
break;
|
||||
case FAIL_OVER:
|
||||
onLiveDown(true);
|
||||
break;
|
||||
default:
|
||||
LOGGER.errorf("unsupported LiveStopping type: %s", finalMessage);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,430 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.activemq.artemis.core.server.impl;
|
||||
|
||||
import javax.annotation.concurrent.GuardedBy;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
import org.apache.activemq.artemis.api.core.ActiveMQAlreadyReplicatingException;
|
||||
import org.apache.activemq.artemis.api.core.ActiveMQException;
|
||||
import org.apache.activemq.artemis.api.core.ActiveMQIllegalStateException;
|
||||
import org.apache.activemq.artemis.api.core.Pair;
|
||||
import org.apache.activemq.artemis.api.core.TransportConfiguration;
|
||||
import org.apache.activemq.artemis.core.persistence.StorageManager;
|
||||
import org.apache.activemq.artemis.core.protocol.core.Channel;
|
||||
import org.apache.activemq.artemis.core.protocol.core.ChannelHandler;
|
||||
import org.apache.activemq.artemis.core.protocol.core.CoreRemotingConnection;
|
||||
import org.apache.activemq.artemis.core.protocol.core.impl.PacketImpl;
|
||||
import org.apache.activemq.artemis.core.protocol.core.impl.wireformat.BackupRegistrationMessage;
|
||||
import org.apache.activemq.artemis.core.protocol.core.impl.wireformat.BackupReplicationStartFailedMessage;
|
||||
import org.apache.activemq.artemis.core.protocol.core.impl.wireformat.ReplicationLiveIsStoppingMessage;
|
||||
import org.apache.activemq.artemis.core.remoting.CloseListener;
|
||||
import org.apache.activemq.artemis.core.remoting.FailureListener;
|
||||
import org.apache.activemq.artemis.core.remoting.server.RemotingService;
|
||||
import org.apache.activemq.artemis.core.replication.ReplicationManager;
|
||||
import org.apache.activemq.artemis.core.server.ActiveMQServerLogger;
|
||||
import org.apache.activemq.artemis.core.server.NodeManager;
|
||||
import org.apache.activemq.artemis.core.server.cluster.ClusterConnection;
|
||||
import org.apache.activemq.artemis.core.server.cluster.ha.ReplicationPrimaryPolicy;
|
||||
import org.apache.activemq.artemis.quorum.DistributedLock;
|
||||
import org.apache.activemq.artemis.quorum.DistributedPrimitiveManager;
|
||||
import org.apache.activemq.artemis.quorum.UnavailableStateException;
|
||||
import org.apache.activemq.artemis.spi.core.remoting.Acceptor;
|
||||
import org.jboss.logging.Logger;
|
||||
|
||||
import static org.apache.activemq.artemis.core.server.ActiveMQServer.SERVER_STATE.STARTED;
|
||||
import static org.apache.activemq.artemis.core.server.impl.quorum.ActivationSequenceStateMachine.awaitNextCommittedActivationSequence;
|
||||
import static org.apache.activemq.artemis.core.server.impl.quorum.ActivationSequenceStateMachine.ensureSequentialAccessToNodeData;
|
||||
import static org.apache.activemq.artemis.core.server.impl.quorum.ActivationSequenceStateMachine.tryActivate;
|
||||
|
||||
/**
|
||||
* This is going to be {@link #run()} just by natural born primary, at the first start.
|
||||
* Both during a failover or a failback, {@link #run()} isn't going to be used, but only {@link #getActivationChannelHandler(Channel, Acceptor)}.
|
||||
*/
|
||||
public class ReplicationPrimaryActivation extends LiveActivation implements DistributedLock.UnavailableLockListener {
|
||||
|
||||
private static final Logger LOGGER = Logger.getLogger(ReplicationPrimaryActivation.class);
|
||||
// This is the time we expect a replica to become a live from the quorum pov
|
||||
// ie time to execute tryActivate and ensureSequentialAccessToNodeData
|
||||
private static final long FAILBACK_TIMEOUT_MILLIS = 4_000;
|
||||
|
||||
private final ReplicationPrimaryPolicy policy;
|
||||
|
||||
private final ActiveMQServerImpl activeMQServer;
|
||||
|
||||
@GuardedBy("replicationLock")
|
||||
private ReplicationManager replicationManager;
|
||||
|
||||
private final Object replicationLock;
|
||||
|
||||
private final DistributedPrimitiveManager distributedManager;
|
||||
|
||||
private final AtomicBoolean stoppingServer;
|
||||
|
||||
public ReplicationPrimaryActivation(final ActiveMQServerImpl activeMQServer,
|
||||
final DistributedPrimitiveManager distributedManager,
|
||||
final ReplicationPrimaryPolicy policy) {
|
||||
this.activeMQServer = activeMQServer;
|
||||
this.policy = policy;
|
||||
this.replicationLock = new Object();
|
||||
this.distributedManager = distributedManager;
|
||||
this.stoppingServer = new AtomicBoolean();
|
||||
}
|
||||
|
||||
/**
|
||||
* used for testing purposes.
|
||||
*/
|
||||
public DistributedPrimitiveManager getDistributedManager() {
|
||||
return distributedManager;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void freezeConnections(RemotingService remotingService) {
|
||||
final ReplicationManager replicationManager = getReplicationManager();
|
||||
|
||||
if (remotingService != null && replicationManager != null) {
|
||||
remotingService.freeze(null, replicationManager.getBackupTransportConnection());
|
||||
} else if (remotingService != null) {
|
||||
remotingService.freeze(null, null);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
try {
|
||||
// we have a common nodeId that we can share and coordinate with between peers
|
||||
if (policy.getCoordinationId() != null) {
|
||||
LOGGER.infof("Applying shared peer NodeID=%s to enable coordinated live activation", policy.getCoordinationId());
|
||||
// REVISIT: this is quite clunky, also in backup activation, we just need new nodeID persisted!
|
||||
activeMQServer.resetNodeManager();
|
||||
activeMQServer.getNodeManager().start();
|
||||
activeMQServer.getNodeManager().setNodeID(policy.getCoordinationId());
|
||||
activeMQServer.getNodeManager().stopBackup();
|
||||
}
|
||||
final long nodeActivationSequence = activeMQServer.getNodeManager().readNodeActivationSequence();
|
||||
final String nodeId = activeMQServer.getNodeManager().readNodeId().toString();
|
||||
DistributedLock liveLock;
|
||||
while (true) {
|
||||
distributedManager.start();
|
||||
try {
|
||||
liveLock = tryActivate(nodeId, nodeActivationSequence, distributedManager, LOGGER);
|
||||
break;
|
||||
} catch (UnavailableStateException canRecoverEx) {
|
||||
distributedManager.stop();
|
||||
}
|
||||
}
|
||||
if (liveLock == null) {
|
||||
distributedManager.stop();
|
||||
LOGGER.infof("This broker cannot become a live server with NodeID = %s: restarting as backup", nodeId);
|
||||
activeMQServer.setHAPolicy(policy.getBackupPolicy());
|
||||
return;
|
||||
}
|
||||
|
||||
ensureSequentialAccessToNodeData(activeMQServer, distributedManager, LOGGER);
|
||||
|
||||
activeMQServer.initialisePart1(false);
|
||||
|
||||
activeMQServer.initialisePart2(false);
|
||||
|
||||
// must be registered before checking the caller
|
||||
liveLock.addListener(this);
|
||||
|
||||
// This control is placed here because initialisePart2 is going to load the journal that
|
||||
// could pause the JVM for enough time to lose lock ownership
|
||||
if (!liveLock.isHeldByCaller()) {
|
||||
throw new IllegalStateException("This broker isn't live anymore, probably due to application pauses eg GC, OS etc: failing now");
|
||||
}
|
||||
|
||||
activeMQServer.completeActivation(true);
|
||||
|
||||
if (activeMQServer.getIdentity() != null) {
|
||||
ActiveMQServerLogger.LOGGER.serverIsLive(activeMQServer.getIdentity());
|
||||
} else {
|
||||
ActiveMQServerLogger.LOGGER.serverIsLive();
|
||||
}
|
||||
} catch (Exception e) {
|
||||
// async stop it, we don't need to await this to complete
|
||||
distributedManager.stop();
|
||||
ActiveMQServerLogger.LOGGER.initializationError(e);
|
||||
activeMQServer.callActivationFailureListeners(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public ChannelHandler getActivationChannelHandler(final Channel channel, final Acceptor acceptorUsed) {
|
||||
if (stoppingServer.get()) {
|
||||
return null;
|
||||
}
|
||||
return packet -> {
|
||||
if (packet.getType() == PacketImpl.BACKUP_REGISTRATION) {
|
||||
onBackupRegistration(channel, acceptorUsed, (BackupRegistrationMessage) packet);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private void onBackupRegistration(final Channel channel,
|
||||
final Acceptor acceptorUsed,
|
||||
final BackupRegistrationMessage msg) {
|
||||
try {
|
||||
startAsyncReplication(channel.getConnection(), acceptorUsed.getClusterConnection(), msg.getConnector(), msg.isFailBackRequest());
|
||||
} catch (ActiveMQAlreadyReplicatingException are) {
|
||||
channel.send(new BackupReplicationStartFailedMessage(BackupReplicationStartFailedMessage.BackupRegistrationProblem.ALREADY_REPLICATING));
|
||||
} catch (ActiveMQException e) {
|
||||
LOGGER.debug("Failed to process backup registration packet", e);
|
||||
channel.send(new BackupReplicationStartFailedMessage(BackupReplicationStartFailedMessage.BackupRegistrationProblem.EXCEPTION));
|
||||
}
|
||||
}
|
||||
|
||||
private void startAsyncReplication(final CoreRemotingConnection remotingConnection,
|
||||
final ClusterConnection clusterConnection,
|
||||
final TransportConfiguration backupTransport,
|
||||
final boolean isFailBackRequest) throws ActiveMQException {
|
||||
synchronized (replicationLock) {
|
||||
if (replicationManager != null) {
|
||||
throw new ActiveMQAlreadyReplicatingException();
|
||||
}
|
||||
if (!activeMQServer.isStarted()) {
|
||||
throw new ActiveMQIllegalStateException();
|
||||
}
|
||||
final ReplicationFailureListener listener = new ReplicationFailureListener();
|
||||
remotingConnection.addCloseListener(listener);
|
||||
remotingConnection.addFailureListener(listener);
|
||||
final ReplicationManager replicationManager = new ReplicationManager(activeMQServer, remotingConnection, clusterConnection.getCallTimeout(), policy.getInitialReplicationSyncTimeout(), activeMQServer.getIOExecutorFactory());
|
||||
this.replicationManager = replicationManager;
|
||||
replicationManager.start();
|
||||
final Thread replicatingThread = new Thread(() -> replicate(replicationManager, clusterConnection, isFailBackRequest, backupTransport));
|
||||
replicatingThread.setName("async-replication-thread");
|
||||
replicatingThread.start();
|
||||
}
|
||||
}
|
||||
|
||||
private void replicate(final ReplicationManager replicationManager,
|
||||
final ClusterConnection clusterConnection,
|
||||
final boolean isFailBackRequest,
|
||||
final TransportConfiguration backupTransport) {
|
||||
try {
|
||||
final String nodeID = activeMQServer.getNodeID().toString();
|
||||
activeMQServer.getStorageManager().startReplication(replicationManager, activeMQServer.getPagingManager(), nodeID, isFailBackRequest && policy.isAllowAutoFailBack(), policy.getInitialReplicationSyncTimeout());
|
||||
|
||||
clusterConnection.nodeAnnounced(System.currentTimeMillis(), nodeID, policy.getGroupName(), policy.getScaleDownGroupName(), new Pair<>(null, backupTransport), true);
|
||||
|
||||
if (isFailBackRequest && policy.isAllowAutoFailBack()) {
|
||||
awaitBackupAnnouncementOnFailbackRequest(clusterConnection);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
if (activeMQServer.getState() == STARTED) {
|
||||
/*
|
||||
* The reasoning here is that the exception was either caused by (1) the
|
||||
* (interaction with) the backup, or (2) by an IO Error at the storage. If (1), we
|
||||
* can swallow the exception and ignore the replication request. If (2) the live
|
||||
* will crash shortly.
|
||||
*/
|
||||
ActiveMQServerLogger.LOGGER.errorStartingReplication(e);
|
||||
}
|
||||
try {
|
||||
ActiveMQServerImpl.stopComponent(replicationManager);
|
||||
} catch (Exception amqe) {
|
||||
ActiveMQServerLogger.LOGGER.errorStoppingReplication(amqe);
|
||||
} finally {
|
||||
synchronized (replicationLock) {
|
||||
if (this.replicationManager == replicationManager) {
|
||||
this.replicationManager = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This is handling awaiting backup announcement before trying to failover.
|
||||
* This broker is a backup broker, acting as a live and ready to restart as a backup
|
||||
*/
|
||||
private void awaitBackupAnnouncementOnFailbackRequest(ClusterConnection clusterConnection) throws Exception {
|
||||
final String nodeID = activeMQServer.getNodeID().toString();
|
||||
final BackupTopologyListener topologyListener = new BackupTopologyListener(nodeID, clusterConnection.getConnector());
|
||||
clusterConnection.addClusterTopologyListener(topologyListener);
|
||||
try {
|
||||
if (topologyListener.waitForBackup()) {
|
||||
restartAsBackupAfterFailback();
|
||||
} else {
|
||||
ActiveMQServerLogger.LOGGER.failbackMissedBackupAnnouncement();
|
||||
}
|
||||
} finally {
|
||||
clusterConnection.removeClusterTopologyListener(topologyListener);
|
||||
}
|
||||
}
|
||||
|
||||
private void restartAsBackupAfterFailback() throws Exception {
|
||||
if (stoppingServer.get()) {
|
||||
return;
|
||||
}
|
||||
final String coordinatedLockAndNodeId;
|
||||
final long inSyncReplicaActivation;
|
||||
synchronized (replicationLock) {
|
||||
if (stoppingServer.get()) {
|
||||
return;
|
||||
}
|
||||
final ReplicationManager replicationManager = this.replicationManager;
|
||||
if (replicationManager == null) {
|
||||
LOGGER.warnf("Failback interrupted");
|
||||
// we got a disconnection from the replica *before* stopping acceptors: better not failback!
|
||||
return;
|
||||
}
|
||||
// IMPORTANT: this is going to save server::fail to issue a replica connection failure (with failed == false)
|
||||
// because onReplicationConnectionClose fail-fast on stopping == true.
|
||||
if (!stoppingServer.compareAndSet(false, true)) {
|
||||
LOGGER.infof("Failback interrupted: server is already stopping");
|
||||
return;
|
||||
}
|
||||
coordinatedLockAndNodeId = activeMQServer.getNodeManager().getNodeId().toString();
|
||||
inSyncReplicaActivation = activeMQServer.getNodeManager().getNodeActivationSequence();
|
||||
// none can notice a concurrent drop of replica connection here: awaitNextCommittedActivationSequence defensively
|
||||
// wait FAILBACK_TIMEOUT_MILLIS, proceed as backup and compete to become live again
|
||||
activeMQServer.fail(true);
|
||||
}
|
||||
try {
|
||||
distributedManager.start();
|
||||
if (!awaitNextCommittedActivationSequence(distributedManager, coordinatedLockAndNodeId, inSyncReplicaActivation, FAILBACK_TIMEOUT_MILLIS, LOGGER)) {
|
||||
LOGGER.warnf("Timed out waiting for failback server activation with NodeID = %s: and sequence > %d: after %dms",
|
||||
coordinatedLockAndNodeId, inSyncReplicaActivation, FAILBACK_TIMEOUT_MILLIS);
|
||||
}
|
||||
} catch (UnavailableStateException ignored) {
|
||||
LOGGER.debug("Unavailable distributed manager while awaiting failback activation sequence: ignored", ignored);
|
||||
} finally {
|
||||
distributedManager.stop();
|
||||
}
|
||||
ActiveMQServerLogger.LOGGER.restartingReplicatedBackupAfterFailback();
|
||||
activeMQServer.setHAPolicy(policy.getBackupPolicy());
|
||||
activeMQServer.start();
|
||||
}
|
||||
|
||||
private void asyncStopServer() {
|
||||
if (stoppingServer.get()) {
|
||||
return;
|
||||
}
|
||||
if (stoppingServer.compareAndSet(false, true)) {
|
||||
new Thread(() -> {
|
||||
try {
|
||||
activeMQServer.stop();
|
||||
} catch (Exception e) {
|
||||
ActiveMQServerLogger.LOGGER.errorRestartingBackupServer(e, activeMQServer);
|
||||
}
|
||||
}).start();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onUnavailableLockEvent() {
|
||||
LOGGER.error("Quorum UNAVAILABLE: async stopping broker.");
|
||||
asyncStopServer();
|
||||
}
|
||||
|
||||
private final class ReplicationFailureListener implements FailureListener, CloseListener {
|
||||
|
||||
@Override
|
||||
public void connectionFailed(ActiveMQException exception, boolean failedOver) {
|
||||
onReplicationConnectionClose();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void connectionFailed(final ActiveMQException me, boolean failedOver, String scaleDownTargetNodeID) {
|
||||
connectionFailed(me, failedOver);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void connectionClosed() {
|
||||
onReplicationConnectionClose();
|
||||
}
|
||||
}
|
||||
|
||||
private void onReplicationConnectionClose() {
|
||||
ExecutorService executorService = activeMQServer.getThreadPool();
|
||||
if (executorService != null) {
|
||||
if (stoppingServer.get()) {
|
||||
return;
|
||||
}
|
||||
executorService.execute(() -> {
|
||||
synchronized (replicationLock) {
|
||||
if (replicationManager == null) {
|
||||
return;
|
||||
}
|
||||
// we increment only if we are staying alive
|
||||
if (!stoppingServer.get() && STARTED.equals(activeMQServer.getState())) {
|
||||
try {
|
||||
ensureSequentialAccessToNodeData(activeMQServer, distributedManager, LOGGER);
|
||||
} catch (Throwable fatal) {
|
||||
LOGGER.errorf(fatal, "Unexpected exception: %s on attempted activation sequence increment; stopping server async", fatal.getLocalizedMessage());
|
||||
asyncStopServer();
|
||||
}
|
||||
}
|
||||
// this is going to stop the replication manager
|
||||
final StorageManager storageManager = activeMQServer.getStorageManager();
|
||||
if (storageManager != null) {
|
||||
storageManager.stopReplication();
|
||||
}
|
||||
replicationManager = null;
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close(boolean permanently, boolean restarting) throws Exception {
|
||||
synchronized (replicationLock) {
|
||||
replicationManager = null;
|
||||
}
|
||||
distributedManager.stop();
|
||||
// To avoid a NPE cause by the stop
|
||||
final NodeManager nodeManager = activeMQServer.getNodeManager();
|
||||
if (nodeManager != null) {
|
||||
if (permanently) {
|
||||
nodeManager.crashLiveServer();
|
||||
} else {
|
||||
nodeManager.pauseLiveServer();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void sendLiveIsStopping() {
|
||||
final ReplicationManager replicationManager = getReplicationManager();
|
||||
if (replicationManager == null) {
|
||||
return;
|
||||
}
|
||||
replicationManager.sendLiveIsStopping(ReplicationLiveIsStoppingMessage.LiveStopping.STOP_CALLED);
|
||||
// this pool gets a 'hard' shutdown, no need to manage the Future of this Runnable.
|
||||
activeMQServer.getScheduledPool().schedule(replicationManager::clearReplicationTokens, 30, TimeUnit.SECONDS);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ReplicationManager getReplicationManager() {
|
||||
synchronized (replicationLock) {
|
||||
return replicationManager;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isReplicaSync() {
|
||||
final ReplicationManager replicationManager = getReplicationManager();
|
||||
if (replicationManager == null) {
|
||||
return false;
|
||||
}
|
||||
return !replicationManager.isSynchronizing();
|
||||
}
|
||||
}
|
|
@ -32,6 +32,7 @@ import org.apache.activemq.artemis.core.postoffice.PostOffice;
|
|||
import org.apache.activemq.artemis.core.protocol.core.Channel;
|
||||
import org.apache.activemq.artemis.core.protocol.core.impl.wireformat.ReplicationLiveIsStoppingMessage;
|
||||
import org.apache.activemq.artemis.core.replication.ReplicationEndpoint;
|
||||
import org.apache.activemq.artemis.core.replication.ReplicationEndpoint.ReplicationEndpointEventListener;
|
||||
import org.apache.activemq.artemis.core.server.ActivationParams;
|
||||
import org.apache.activemq.artemis.core.server.ActiveMQMessageBundle;
|
||||
import org.apache.activemq.artemis.core.server.ActiveMQServer;
|
||||
|
@ -54,7 +55,7 @@ import static org.apache.activemq.artemis.core.server.cluster.qourum.SharedNothi
|
|||
import static org.apache.activemq.artemis.core.server.cluster.qourum.SharedNothingBackupQuorum.BACKUP_ACTIVATION.FAIL_OVER;
|
||||
import static org.apache.activemq.artemis.core.server.cluster.qourum.SharedNothingBackupQuorum.BACKUP_ACTIVATION.STOP;
|
||||
|
||||
public final class SharedNothingBackupActivation extends Activation {
|
||||
public final class SharedNothingBackupActivation extends Activation implements ReplicationEndpointEventListener {
|
||||
|
||||
private static final Logger logger = Logger.getLogger(SharedNothingBackupActivation.class);
|
||||
|
||||
|
@ -96,7 +97,7 @@ public final class SharedNothingBackupActivation extends Activation {
|
|||
assert replicationEndpoint == null;
|
||||
activeMQServer.resetNodeManager();
|
||||
backupUpToDate = false;
|
||||
replicationEndpoint = new ReplicationEndpoint(activeMQServer, ioCriticalErrorListener, attemptFailBack, this);
|
||||
replicationEndpoint = new ReplicationEndpoint(activeMQServer, attemptFailBack, this);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -156,9 +157,6 @@ public final class SharedNothingBackupActivation extends Activation {
|
|||
logger.debug("Starting backup manager");
|
||||
activeMQServer.getBackupManager().start();
|
||||
|
||||
logger.debug("Set backup Quorum");
|
||||
replicationEndpoint.setBackupQuorum(backupQuorum);
|
||||
|
||||
replicationEndpoint.setExecutor(activeMQServer.getExecutorFactory().getExecutor());
|
||||
EndpointConnector endpointConnector = new EndpointConnector();
|
||||
|
||||
|
@ -461,7 +459,13 @@ public final class SharedNothingBackupActivation extends Activation {
|
|||
return backupUpToDate;
|
||||
}
|
||||
|
||||
public void setRemoteBackupUpToDate() {
|
||||
@Override
|
||||
public void onLiveNodeId(String nodeId) {
|
||||
backupQuorum.liveIDSet(nodeId);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onRemoteBackupUpToDate() {
|
||||
activeMQServer.getBackupManager().announceBackup();
|
||||
backupUpToDate = true;
|
||||
backupSyncLatch.countDown();
|
||||
|
@ -470,7 +474,8 @@ public final class SharedNothingBackupActivation extends Activation {
|
|||
/**
|
||||
* @throws ActiveMQException
|
||||
*/
|
||||
public void remoteFailOver(ReplicationLiveIsStoppingMessage.LiveStopping finalMessage) throws ActiveMQException {
|
||||
@Override
|
||||
public void onLiveStopping(ReplicationLiveIsStoppingMessage.LiveStopping finalMessage) throws ActiveMQException {
|
||||
if (logger.isTraceEnabled()) {
|
||||
logger.trace("Remote fail-over, got message=" + finalMessage + ", backupUpToDate=" +
|
||||
backupUpToDate);
|
||||
|
@ -526,4 +531,9 @@ public final class SharedNothingBackupActivation extends Activation {
|
|||
return replicationEndpoint;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isReplicaSync() {
|
||||
return isRemoteBackupUpToDate();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -462,4 +462,13 @@ public class SharedNothingLiveActivation extends LiveActivation {
|
|||
private TransportConfiguration[] connectorNameListToArray(final List<String> connectorNames) {
|
||||
return activeMQServer.getConfiguration().getTransportConfigurations(connectorNames);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isReplicaSync() {
|
||||
final ReplicationManager replicationManager = getReplicationManager();
|
||||
if (replicationManager == null) {
|
||||
return false;
|
||||
}
|
||||
return !replicationManager.isSynchronizing();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,312 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.activemq.artemis.core.server.impl.quorum;
|
||||
|
||||
import java.util.Objects;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.TimeoutException;
|
||||
|
||||
import org.apache.activemq.artemis.api.core.ActiveMQException;
|
||||
import org.apache.activemq.artemis.core.server.ActiveMQServer;
|
||||
import org.apache.activemq.artemis.core.server.NodeManager;
|
||||
import org.apache.activemq.artemis.quorum.DistributedLock;
|
||||
import org.apache.activemq.artemis.quorum.DistributedPrimitiveManager;
|
||||
import org.apache.activemq.artemis.quorum.MutableLong;
|
||||
import org.apache.activemq.artemis.quorum.UnavailableStateException;
|
||||
import org.jboss.logging.Logger;
|
||||
|
||||
/**
|
||||
* This class contains the activation sequence logic of the pluggable quorum vote:
|
||||
* it should be used by {@link org.apache.activemq.artemis.core.server.impl.ReplicationBackupActivation}
|
||||
* and {@link org.apache.activemq.artemis.core.server.impl.ReplicationPrimaryActivation} to coordinate
|
||||
* for replication.
|
||||
*/
|
||||
public final class ActivationSequenceStateMachine {
|
||||
|
||||
private static final long CHECK_ACTIVATION_SEQUENCE_WAIT_MILLIS = 200;
|
||||
private static final long CHECK_REPAIRED_ACTIVATION_SEQUENCE_WAIT_MILLIS = 2000;
|
||||
private static final long LIVE_LOCK_ACQUIRE_TIMEOUT_MILLIS = 2000;
|
||||
|
||||
private ActivationSequenceStateMachine() {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* It loops if the data of the broker is still valuable, but cannot become live.
|
||||
* It loops (temporarly) if data is in sync or can self-heal, but cannot yet acquire the live lock.
|
||||
* <p>
|
||||
* It stops loop and return:
|
||||
* <p><ul>
|
||||
* <li>{@code null}: if data is stale (and there are no rights to become live)
|
||||
* <li>{@code !=null}: if data is in sync and the {@link DistributedLock} is correctly acquired
|
||||
* </ul><p>
|
||||
* <p>
|
||||
* After successfully returning from this method ie not null return value, a broker should use
|
||||
* {@link #ensureSequentialAccessToNodeData(ActiveMQServer, DistributedPrimitiveManager, Logger)} to complete
|
||||
* the activation and guarantee the initial not-replicated ownership of data.
|
||||
*/
|
||||
public static DistributedLock tryActivate(final String nodeId,
|
||||
final long nodeActivationSequence,
|
||||
final DistributedPrimitiveManager distributedManager,
|
||||
final Logger logger) throws InterruptedException, ExecutionException, TimeoutException, UnavailableStateException {
|
||||
final DistributedLock activationLock = distributedManager.getDistributedLock(nodeId);
|
||||
try (MutableLong coordinatedNodeSequence = distributedManager.getMutableLong(nodeId)) {
|
||||
while (true) {
|
||||
// dirty read is sufficient to know if we are *not* an in sync replica
|
||||
// typically the lock owner will increment to signal our data is stale and we are happy without any
|
||||
// further coordination at this point
|
||||
switch (validateActivationSequence(coordinatedNodeSequence, activationLock, nodeId, nodeActivationSequence, logger)) {
|
||||
|
||||
case Stale:
|
||||
activationLock.close();
|
||||
return null;
|
||||
case SelfRepair:
|
||||
case InSync:
|
||||
break;
|
||||
case MaybeInSync:
|
||||
if (activationLock.tryLock()) {
|
||||
// BAD: where's the broker that should commit it?
|
||||
activationLock.unlock();
|
||||
logger.warnf("Cannot assume live role for NodeID = %s: claimed activation sequence need to be repaired",
|
||||
nodeId);
|
||||
TimeUnit.MILLISECONDS.sleep(CHECK_REPAIRED_ACTIVATION_SEQUENCE_WAIT_MILLIS);
|
||||
continue;
|
||||
}
|
||||
// quick path while data is still valuable: wait until something change (commit/repair)
|
||||
TimeUnit.MILLISECONDS.sleep(CHECK_ACTIVATION_SEQUENCE_WAIT_MILLIS);
|
||||
continue;
|
||||
}
|
||||
// SelfRepair, InSync
|
||||
if (!activationLock.tryLock(LIVE_LOCK_ACQUIRE_TIMEOUT_MILLIS, TimeUnit.MILLISECONDS)) {
|
||||
logger.debugf("Candidate for Node ID = %s, with local activation sequence: %d, cannot acquire live lock within %dms; retrying",
|
||||
nodeId, nodeActivationSequence, LIVE_LOCK_ACQUIRE_TIMEOUT_MILLIS);
|
||||
continue;
|
||||
}
|
||||
switch (validateActivationSequence(coordinatedNodeSequence, activationLock, nodeId, nodeActivationSequence, logger)) {
|
||||
|
||||
case Stale:
|
||||
activationLock.close();
|
||||
return null;
|
||||
case SelfRepair:
|
||||
// Self-repair sequence ie we were the only one with the most up to date data.
|
||||
// NOTE: We cannot move the sequence now, let's delay it on ensureSequentialAccessToNodeData
|
||||
logger.infof("Assuming live role for NodeID = %s: local activation sequence %d matches claimed coordinated activation sequence %d. Repairing sequence", nodeId, nodeActivationSequence, nodeActivationSequence);
|
||||
return activationLock;
|
||||
case InSync:
|
||||
// we are an in_sync_replica, good to go live as UNREPLICATED
|
||||
logger.infof("Assuming live role for NodeID = %s, local activation sequence %d matches current coordinated activation sequence %d", nodeId, nodeActivationSequence, nodeActivationSequence);
|
||||
return activationLock;
|
||||
case MaybeInSync:
|
||||
activationLock.unlock();
|
||||
logger.warnf("Cannot assume live role for NodeID = %s: claimed activation sequence need to be repaired", nodeId);
|
||||
TimeUnit.MILLISECONDS.sleep(CHECK_REPAIRED_ACTIVATION_SEQUENCE_WAIT_MILLIS);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private enum ValidationResult {
|
||||
/**
|
||||
* coordinated activation sequence (claimed/committed) is far beyond the local one: data is not valuable anymore
|
||||
**/
|
||||
Stale,
|
||||
/**
|
||||
* coordinated activation sequence is the same as local one: data is in sync
|
||||
**/
|
||||
InSync,
|
||||
/**
|
||||
* next coordinated activation sequence is not committed yet: maybe data is in sync
|
||||
**/
|
||||
MaybeInSync,
|
||||
/**
|
||||
* next coordinated activation sequence is not committed yet, but this broker can self-repair: data is in sync
|
||||
**/
|
||||
SelfRepair
|
||||
}
|
||||
|
||||
private static ValidationResult validateActivationSequence(final MutableLong coordinatedNodeSequence,
|
||||
final DistributedLock activationLock,
|
||||
final String lockAndLongId,
|
||||
final long nodeActivationSequence,
|
||||
final Logger logger) throws UnavailableStateException {
|
||||
assert coordinatedNodeSequence.getMutableLongId().equals(lockAndLongId);
|
||||
assert activationLock.getLockId().equals(lockAndLongId);
|
||||
final long currentCoordinatedNodeSequence = coordinatedNodeSequence.get();
|
||||
if (nodeActivationSequence == currentCoordinatedNodeSequence) {
|
||||
return ValidationResult.InSync;
|
||||
}
|
||||
if (currentCoordinatedNodeSequence > 0) {
|
||||
logger.infof("Not a candidate for NodeID = %s activation, local activation sequence %d does not match coordinated activation sequence %d",
|
||||
lockAndLongId, nodeActivationSequence, currentCoordinatedNodeSequence);
|
||||
return ValidationResult.Stale;
|
||||
}
|
||||
// claimed activation sequence
|
||||
final long claimedCoordinatedNodeSequence = -currentCoordinatedNodeSequence;
|
||||
final long sequenceGap = claimedCoordinatedNodeSequence - nodeActivationSequence;
|
||||
if (sequenceGap == 0) {
|
||||
return ValidationResult.SelfRepair;
|
||||
}
|
||||
if (sequenceGap == 1) {
|
||||
// maybe data is still valuable
|
||||
return ValidationResult.MaybeInSync;
|
||||
}
|
||||
assert sequenceGap > 1;
|
||||
// sequence is moved so much that data is no longer valuable
|
||||
logger.infof("Not a candidate for NodeID = %s activation, local activation sequence %d does not match coordinated activation sequence %d",
|
||||
lockAndLongId, nodeActivationSequence, claimedCoordinatedNodeSequence);
|
||||
return ValidationResult.Stale;
|
||||
}
|
||||
|
||||
/**
|
||||
* It wait until {@code timeoutMillis ms} has passed or the coordinated activation sequence has progressed enough
|
||||
*/
|
||||
public static boolean awaitNextCommittedActivationSequence(final DistributedPrimitiveManager distributedManager,
|
||||
final String coordinatedLockAndNodeId,
|
||||
final long activationSequence,
|
||||
final long timeoutMills,
|
||||
final Logger logger)
|
||||
throws ExecutionException, InterruptedException, TimeoutException, UnavailableStateException {
|
||||
Objects.requireNonNull(distributedManager);
|
||||
Objects.requireNonNull(logger);
|
||||
Objects.requireNonNull(coordinatedLockAndNodeId);
|
||||
if (activationSequence < 0) {
|
||||
throw new IllegalArgumentException("activationSequence must be >= 0, while is " + activationSequence);
|
||||
}
|
||||
if (!distributedManager.isStarted()) {
|
||||
throw new IllegalStateException("manager must be started");
|
||||
}
|
||||
final MutableLong coordinatedActivationSequence = distributedManager.getMutableLong(coordinatedLockAndNodeId);
|
||||
// wait for the live to activate and run un replicated with a sequence > inSyncReplicaActivation
|
||||
// this read can be dirty b/c we are just looking for an increment.
|
||||
boolean anyNext = false;
|
||||
final long timeoutNs = TimeUnit.MILLISECONDS.toNanos(timeoutMills);
|
||||
final long started = System.nanoTime();
|
||||
long elapsedNs;
|
||||
do {
|
||||
final long coordinatedValue = coordinatedActivationSequence.get();
|
||||
if (coordinatedValue > activationSequence) {
|
||||
// all good, some activation has gone ahead
|
||||
logger.infof("Detected a new activation sequence with NodeID = %s: and sequence: %d", coordinatedLockAndNodeId, coordinatedValue);
|
||||
anyNext = true;
|
||||
break;
|
||||
}
|
||||
if (coordinatedValue < 0) {
|
||||
// commit claim
|
||||
final long claimedSequence = -coordinatedValue;
|
||||
final long activationsGap = claimedSequence - activationSequence;
|
||||
if (activationsGap > 1) {
|
||||
// all good, some activation has gone ahead
|
||||
logger.infof("Detected furthers sequential server activations from sequence %d, with NodeID = %s: and claimed sequence: %d", activationSequence, coordinatedLockAndNodeId, claimedSequence);
|
||||
anyNext = true;
|
||||
break;
|
||||
}
|
||||
// activation is still in progress
|
||||
logger.debugf("Detected claiming of activation sequence = %d for NodeID = %s", claimedSequence, coordinatedLockAndNodeId);
|
||||
}
|
||||
try {
|
||||
TimeUnit.MILLISECONDS.sleep(CHECK_ACTIVATION_SEQUENCE_WAIT_MILLIS);
|
||||
} catch (InterruptedException ignored) {
|
||||
}
|
||||
elapsedNs = System.nanoTime() - started;
|
||||
}
|
||||
while (elapsedNs < timeoutNs);
|
||||
return anyNext;
|
||||
}
|
||||
|
||||
/**
|
||||
* This is going to increment the coordinated activation sequence while holding the live lock, failing with some exception otherwise.<br>
|
||||
* <p>
|
||||
* The acceptable states are {@link ValidationResult#InSync} and {@link ValidationResult#SelfRepair}, throwing some exception otherwise.
|
||||
* <p>
|
||||
* This must be used while holding a live lock to ensure not-exclusive ownership of data ie can be both used
|
||||
* while loosing connectivity with a replica or after successfully {@link #tryActivate(String, long, DistributedPrimitiveManager, Logger)}.
|
||||
*/
|
||||
public static void ensureSequentialAccessToNodeData(ActiveMQServer activeMQServer,
|
||||
DistributedPrimitiveManager distributedPrimitiveManager,
|
||||
final Logger logger) throws ActiveMQException, InterruptedException, UnavailableStateException, ExecutionException, TimeoutException {
|
||||
|
||||
final NodeManager nodeManager = activeMQServer.getNodeManager();
|
||||
final String lockAndLongId = nodeManager.getNodeId().toString();
|
||||
final DistributedLock liveLock = distributedPrimitiveManager.getDistributedLock(lockAndLongId);
|
||||
if (!liveLock.isHeldByCaller()) {
|
||||
final String message = String.format("Server [%s], live lock for NodeID = %s, not held, activation sequence cannot be safely changed",
|
||||
activeMQServer, lockAndLongId);
|
||||
logger.info(message);
|
||||
throw new UnavailableStateException(message);
|
||||
}
|
||||
final long nodeActivationSequence = nodeManager.readNodeActivationSequence();
|
||||
final MutableLong coordinatedNodeActivationSequence = distributedPrimitiveManager.getMutableLong(lockAndLongId);
|
||||
final long currentCoordinatedActivationSequence = coordinatedNodeActivationSequence.get();
|
||||
final long nextActivationSequence;
|
||||
if (currentCoordinatedActivationSequence < 0) {
|
||||
// Check Self-Repair
|
||||
if (nodeActivationSequence != -currentCoordinatedActivationSequence) {
|
||||
final String message = String.format("Server [%s], cannot assume live role for NodeID = %s, local activation sequence %d does not match current claimed coordinated sequence %d: need repair",
|
||||
activeMQServer, lockAndLongId, nodeActivationSequence, -currentCoordinatedActivationSequence);
|
||||
logger.info(message);
|
||||
throw new ActiveMQException(message);
|
||||
}
|
||||
// auto-repair: this is the same server that failed to commit its claimed sequence
|
||||
nextActivationSequence = nodeActivationSequence;
|
||||
} else {
|
||||
// Check InSync
|
||||
if (nodeActivationSequence != currentCoordinatedActivationSequence) {
|
||||
final String message = String.format("Server [%s], cannot assume live role for NodeID = %s, local activation sequence %d does not match current coordinated sequence %d",
|
||||
activeMQServer, lockAndLongId, nodeActivationSequence, currentCoordinatedActivationSequence);
|
||||
logger.info(message);
|
||||
throw new ActiveMQException(message);
|
||||
}
|
||||
nextActivationSequence = nodeActivationSequence + 1;
|
||||
}
|
||||
// UN_REPLICATED STATE ENTER: auto-repair doesn't need to claim and write locally
|
||||
if (nodeActivationSequence != nextActivationSequence) {
|
||||
// claim
|
||||
if (!coordinatedNodeActivationSequence.compareAndSet(nodeActivationSequence, -nextActivationSequence)) {
|
||||
final String message = String.format("Server [%s], cannot assume live role for NodeID = %s, activation sequence claim failed, local activation sequence %d no longer matches current coordinated sequence %d",
|
||||
activeMQServer, lockAndLongId, nodeActivationSequence, coordinatedNodeActivationSequence.get());
|
||||
logger.infof(message);
|
||||
throw new ActiveMQException(message);
|
||||
}
|
||||
// claim success: write locally
|
||||
try {
|
||||
nodeManager.writeNodeActivationSequence(nextActivationSequence);
|
||||
} catch (NodeManager.NodeManagerException fatal) {
|
||||
logger.errorf("Server [%s] failed to set local activation sequence to: %d for NodeId =%s. Cannot continue committing coordinated activation sequence: REQUIRES ADMIN INTERVENTION",
|
||||
activeMQServer, nextActivationSequence, lockAndLongId);
|
||||
throw new UnavailableStateException(fatal);
|
||||
}
|
||||
logger.infof("Server [%s], incremented local activation sequence to: %d for NodeId = %s",
|
||||
activeMQServer, nextActivationSequence, lockAndLongId);
|
||||
} else {
|
||||
// self-heal need to update the in-memory sequence, because no writes will do it
|
||||
nodeManager.setNodeActivationSequence(nextActivationSequence);
|
||||
}
|
||||
// commit
|
||||
if (!coordinatedNodeActivationSequence.compareAndSet(-nextActivationSequence, nextActivationSequence)) {
|
||||
final String message = String.format("Server [%s], cannot assume live role for NodeID = %s, activation sequence commit failed, local activation sequence %d no longer matches current coordinated sequence %d",
|
||||
activeMQServer, lockAndLongId, nodeActivationSequence, coordinatedNodeActivationSequence.get());
|
||||
logger.infof(message);
|
||||
throw new ActiveMQException(message);
|
||||
}
|
||||
logger.infof("Server [%s], incremented coordinated activation sequence to: %d for NodeId = %s",
|
||||
activeMQServer, nextActivationSequence, lockAndLongId);
|
||||
}
|
||||
|
||||
}
|
|
@ -244,7 +244,9 @@ public class ManagementServiceImpl implements ManagementService {
|
|||
ObjectName objectName = objectNameBuilder.getActiveMQServerObjectName();
|
||||
unregisterFromJMX(objectName);
|
||||
unregisterFromRegistry(ResourceNames.BROKER);
|
||||
unregisterMeters(ResourceNames.BROKER + "." + messagingServer.getConfiguration().getName());
|
||||
if (messagingServer != null) {
|
||||
unregisterMeters(ResourceNames.BROKER + "." + messagingServer.getConfiguration().getName());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -2605,7 +2605,7 @@
|
|||
</xsd:annotation>
|
||||
<xsd:complexType>
|
||||
<xsd:sequence>
|
||||
<xsd:element name="data-source-property" type="dataSourcePropertyType" minOccurs="1" maxOccurs="unbounded">
|
||||
<xsd:element name="data-source-property" type="propertyType" minOccurs="1" maxOccurs="unbounded">
|
||||
<xsd:annotation>
|
||||
<xsd:documentation>
|
||||
A key-value pair option for the DataSource
|
||||
|
@ -2682,7 +2682,7 @@
|
|||
<xsd:attributeGroup ref="xml:specialAttrs"/>
|
||||
</xsd:complexType>
|
||||
|
||||
<xsd:complexType name="dataSourcePropertyType">
|
||||
<xsd:complexType name="propertyType">
|
||||
<xsd:attribute name="key" type="xsd:string" use="required">
|
||||
<xsd:annotation>
|
||||
<xsd:documentation>
|
||||
|
@ -2726,6 +2726,36 @@
|
|||
<xsd:attributeGroup ref="xml:specialAttrs"/>
|
||||
</xsd:complexType>
|
||||
|
||||
<xsd:complexType name="distributed-primitive-manager">
|
||||
<xsd:all>
|
||||
<xsd:element name="class-name" type="xsd:string" minOccurs="0" maxOccurs="1">
|
||||
<xsd:annotation>
|
||||
<xsd:documentation>
|
||||
The distributed-primitive-manager class name
|
||||
</xsd:documentation>
|
||||
</xsd:annotation>
|
||||
</xsd:element>
|
||||
<xsd:element name="properties" minOccurs="0" maxOccurs="1">
|
||||
<xsd:annotation>
|
||||
<xsd:documentation>
|
||||
A list of options for the distributed-primitive-manager
|
||||
</xsd:documentation>
|
||||
</xsd:annotation>
|
||||
<xsd:complexType>
|
||||
<xsd:sequence>
|
||||
<xsd:element name="property" type="propertyType" minOccurs="1" maxOccurs="unbounded">
|
||||
<xsd:annotation>
|
||||
<xsd:documentation>
|
||||
A key-value pair option for the distributed-primitive-manager
|
||||
</xsd:documentation>
|
||||
</xsd:annotation>
|
||||
</xsd:element>
|
||||
</xsd:sequence>
|
||||
</xsd:complexType>
|
||||
</xsd:element>
|
||||
</xsd:all>
|
||||
</xsd:complexType>
|
||||
|
||||
<xsd:complexType name="haReplicationType">
|
||||
<xsd:choice>
|
||||
<xsd:element name="master" type="replicatedPolicyType" minOccurs="0" maxOccurs="1">
|
||||
|
@ -2749,6 +2779,20 @@
|
|||
</xsd:documentation>
|
||||
</xsd:annotation>
|
||||
</xsd:element>
|
||||
<xsd:element name="primary" type="asyncPrimaryPolicyType" minOccurs="0" maxOccurs="1">
|
||||
<xsd:annotation>
|
||||
<xsd:documentation>
|
||||
A primary server configured to replicate.
|
||||
</xsd:documentation>
|
||||
</xsd:annotation>
|
||||
</xsd:element>
|
||||
<xsd:element name="backup" type="asyncBackupPolicyType" minOccurs="0" maxOccurs="1">
|
||||
<xsd:annotation>
|
||||
<xsd:documentation>
|
||||
A backup server configured to replicate.
|
||||
</xsd:documentation>
|
||||
</xsd:annotation>
|
||||
</xsd:element>
|
||||
</xsd:choice>
|
||||
<xsd:attributeGroup ref="xml:specialAttrs"/>
|
||||
</xsd:complexType>
|
||||
|
@ -3119,6 +3163,125 @@
|
|||
</xsd:all>
|
||||
<xsd:attributeGroup ref="xml:specialAttrs"/>
|
||||
</xsd:complexType>
|
||||
<xsd:complexType name="asyncPrimaryPolicyType">
|
||||
<xsd:all>
|
||||
<xsd:element name="manager" type="distributed-primitive-manager" minOccurs="1" maxOccurs="1">
|
||||
<xsd:annotation>
|
||||
<xsd:documentation>
|
||||
It's the manager used to manager distributed locks used for this type of replication.
|
||||
</xsd:documentation>
|
||||
</xsd:annotation>
|
||||
</xsd:element>
|
||||
<xsd:element name="group-name" type="xsd:string" minOccurs="0" maxOccurs="1">
|
||||
<xsd:annotation>
|
||||
<xsd:documentation>
|
||||
used for replication, if set, (remote) backup servers will only pair with live servers with matching
|
||||
group-name
|
||||
</xsd:documentation>
|
||||
</xsd:annotation>
|
||||
</xsd:element>
|
||||
<xsd:element name="cluster-name" type="xsd:string" maxOccurs="1" minOccurs="0">
|
||||
<xsd:annotation>
|
||||
<xsd:documentation>
|
||||
Name of the cluster configuration to use for replication. This setting is only necessary in case you
|
||||
configure multiple cluster connections. It is used by a replicating backups and by live servers that
|
||||
may attempt fail-back.
|
||||
</xsd:documentation>
|
||||
</xsd:annotation>
|
||||
</xsd:element>
|
||||
<xsd:element name="coordination-id" type="xsd:string" maxOccurs="1" minOccurs="0">
|
||||
<xsd:annotation>
|
||||
<xsd:documentation>
|
||||
The common identity to use for coordination that is shared across instances that will replicate.
|
||||
The value will be used as the internal server nodeId and as the identity of entities in the
|
||||
distributed-primitive-manager.
|
||||
</xsd:documentation>
|
||||
</xsd:annotation>
|
||||
</xsd:element>
|
||||
<xsd:element name="initial-replication-sync-timeout" type="xsd:long" default="30000" maxOccurs="1"
|
||||
minOccurs="0">
|
||||
<xsd:annotation>
|
||||
<xsd:documentation>
|
||||
The amount of time to wait for the replica to acknowledge it has received all the necessary data from
|
||||
the replicating server at the final step of the initial replication synchronization process.
|
||||
</xsd:documentation>
|
||||
</xsd:annotation>
|
||||
</xsd:element>
|
||||
<xsd:element name="retry-replication-wait" type="xsd:long" default="2000" minOccurs="0" maxOccurs="1">
|
||||
<xsd:annotation>
|
||||
<xsd:documentation>
|
||||
If we start as a replica how long to wait (in milliseconds) before trying to replicate again after failing to find a replica
|
||||
</xsd:documentation>
|
||||
</xsd:annotation>
|
||||
</xsd:element>
|
||||
</xsd:all>
|
||||
<xsd:attributeGroup ref="xml:specialAttrs"/>
|
||||
</xsd:complexType>
|
||||
<xsd:complexType name="asyncBackupPolicyType">
|
||||
<xsd:all>
|
||||
<xsd:element name="manager" type="distributed-primitive-manager" minOccurs="1" maxOccurs="1">
|
||||
<xsd:annotation>
|
||||
<xsd:documentation>
|
||||
It's the manager used to manager distributed locks used for this type of replication.
|
||||
</xsd:documentation>
|
||||
</xsd:annotation>
|
||||
</xsd:element>
|
||||
<xsd:element name="group-name" type="xsd:string" minOccurs="0" maxOccurs="1">
|
||||
<xsd:annotation>
|
||||
<xsd:documentation>
|
||||
used for replication, if set, (remote) backup servers will only pair with live servers with matching
|
||||
group-name
|
||||
</xsd:documentation>
|
||||
</xsd:annotation>
|
||||
</xsd:element>
|
||||
<xsd:element name="cluster-name" type="xsd:string" maxOccurs="1" minOccurs="0">
|
||||
<xsd:annotation>
|
||||
<xsd:documentation>
|
||||
Name of the cluster configuration to use for replication. This setting is only necessary in case you
|
||||
configure multiple cluster connections. It is used by a replicating backups and by live servers that
|
||||
may attempt fail-back.
|
||||
</xsd:documentation>
|
||||
</xsd:annotation>
|
||||
</xsd:element>
|
||||
<xsd:element name="max-saved-replicated-journals-size" type="xsd:int" default="2" maxOccurs="1" minOccurs="0">
|
||||
<xsd:annotation>
|
||||
<xsd:documentation>
|
||||
This specifies how many times a replicated backup server can restart after moving its files on start.
|
||||
Once there are this number of backup journal files the server will stop permanently after if fails
|
||||
back.
|
||||
</xsd:documentation>
|
||||
</xsd:annotation>
|
||||
</xsd:element>
|
||||
<xsd:element name="allow-failback" type="xsd:boolean" default="true" maxOccurs="1" minOccurs="0">
|
||||
<xsd:annotation>
|
||||
<xsd:documentation>
|
||||
Whether a server will automatically stop when a another places a request to take over
|
||||
its place. The use case is when a regular server stops and its backup takes over its
|
||||
duties, later the main server restarts and requests the server (the former backup) to
|
||||
stop operating.
|
||||
</xsd:documentation>
|
||||
</xsd:annotation>
|
||||
</xsd:element>
|
||||
<xsd:element name="initial-replication-sync-timeout" type="xsd:long" default="30000" maxOccurs="1"
|
||||
minOccurs="0">
|
||||
<xsd:annotation>
|
||||
<xsd:documentation>
|
||||
If we have to start as a replicated server this is the amount of time to wait for the replica to
|
||||
acknowledge it has received all the necessary data from the replicating server at the final step
|
||||
of the initial replication synchronization process.
|
||||
</xsd:documentation>
|
||||
</xsd:annotation>
|
||||
</xsd:element>
|
||||
<xsd:element name="retry-replication-wait" type="xsd:long" default="2000" minOccurs="0" maxOccurs="1">
|
||||
<xsd:annotation>
|
||||
<xsd:documentation>
|
||||
How long to wait (in milliseconds) before trying to replicate again after failing to find a replica
|
||||
</xsd:documentation>
|
||||
</xsd:annotation>
|
||||
</xsd:element>
|
||||
</xsd:all>
|
||||
<xsd:attributeGroup ref="xml:specialAttrs"/>
|
||||
</xsd:complexType>
|
||||
<xsd:complexType name="colocatedReplicaPolicyType">
|
||||
<xsd:all>
|
||||
<xsd:element name="group-name" type="xsd:string" minOccurs="0" maxOccurs="1">
|
||||
|
|
|
@ -17,7 +17,12 @@
|
|||
package org.apache.activemq.artemis.core.config.impl;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.TimeoutException;
|
||||
|
||||
import org.apache.activemq.artemis.api.config.ActiveMQDefaultConfiguration;
|
||||
import org.apache.activemq.artemis.core.config.Configuration;
|
||||
import org.apache.activemq.artemis.core.config.FileDeploymentManager;
|
||||
import org.apache.activemq.artemis.core.config.HAPolicyConfiguration;
|
||||
|
@ -27,6 +32,8 @@ import org.apache.activemq.artemis.core.server.cluster.ha.HAPolicy;
|
|||
import org.apache.activemq.artemis.core.server.cluster.ha.LiveOnlyPolicy;
|
||||
import org.apache.activemq.artemis.core.server.cluster.ha.ReplicaPolicy;
|
||||
import org.apache.activemq.artemis.core.server.cluster.ha.ReplicatedPolicy;
|
||||
import org.apache.activemq.artemis.core.server.cluster.ha.ReplicationBackupPolicy;
|
||||
import org.apache.activemq.artemis.core.server.cluster.ha.ReplicationPrimaryPolicy;
|
||||
import org.apache.activemq.artemis.core.server.cluster.ha.ScaleDownPolicy;
|
||||
import org.apache.activemq.artemis.core.server.cluster.ha.SharedStoreMasterPolicy;
|
||||
import org.apache.activemq.artemis.core.server.cluster.ha.SharedStoreSlavePolicy;
|
||||
|
@ -35,11 +42,19 @@ import org.apache.activemq.artemis.core.server.impl.ActiveMQServerImpl;
|
|||
import org.apache.activemq.artemis.core.server.impl.ColocatedActivation;
|
||||
import org.apache.activemq.artemis.core.server.impl.FileLockNodeManager;
|
||||
import org.apache.activemq.artemis.core.server.impl.LiveOnlyActivation;
|
||||
import org.apache.activemq.artemis.core.server.impl.ReplicationBackupActivation;
|
||||
import org.apache.activemq.artemis.core.server.impl.ReplicationPrimaryActivation;
|
||||
import org.apache.activemq.artemis.core.server.impl.SharedNothingBackupActivation;
|
||||
import org.apache.activemq.artemis.core.server.impl.SharedNothingLiveActivation;
|
||||
import org.apache.activemq.artemis.core.server.impl.SharedStoreBackupActivation;
|
||||
import org.apache.activemq.artemis.core.server.impl.SharedStoreLiveActivation;
|
||||
import org.apache.activemq.artemis.quorum.DistributedLock;
|
||||
import org.apache.activemq.artemis.quorum.DistributedPrimitiveManager;
|
||||
import org.apache.activemq.artemis.quorum.MutableLong;
|
||||
import org.apache.activemq.artemis.quorum.UnavailableStateException;
|
||||
import org.apache.activemq.artemis.tests.util.ActiveMQTestBase;
|
||||
import org.hamcrest.MatcherAssert;
|
||||
import org.hamcrest.core.IsInstanceOf;
|
||||
import org.junit.Test;
|
||||
|
||||
import static org.hamcrest.CoreMatchers.instanceOf;
|
||||
|
@ -124,6 +139,242 @@ public class HAPolicyConfigurationTest extends ActiveMQTestBase {
|
|||
liveOnlyTest("live-only-hapolicy-config5.xml");
|
||||
}
|
||||
|
||||
public static class FakeDistributedPrimitiveManager implements DistributedPrimitiveManager {
|
||||
|
||||
private final Map<String, String> config;
|
||||
private boolean started;
|
||||
private DistributedLock lock;
|
||||
|
||||
public FakeDistributedPrimitiveManager(Map<String, String> config) {
|
||||
this.config = config;
|
||||
this.started = false;
|
||||
}
|
||||
|
||||
public Map<String, String> getConfig() {
|
||||
return config;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addUnavailableManagerListener(UnavailableManagerListener listener) {
|
||||
// no op
|
||||
}
|
||||
|
||||
@Override
|
||||
public void removeUnavailableManagerListener(UnavailableManagerListener listener) {
|
||||
// no op
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean start(long timeout, TimeUnit unit) throws InterruptedException, ExecutionException {
|
||||
started = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void start() throws InterruptedException, ExecutionException {
|
||||
started = true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isStarted() {
|
||||
return started;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void stop() {
|
||||
started = false;
|
||||
if (lock != null) {
|
||||
lock.close();
|
||||
}
|
||||
lock = null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DistributedLock getDistributedLock(String lockId) {
|
||||
if (!started) {
|
||||
throw new IllegalStateException("need to start first");
|
||||
}
|
||||
if (lock == null) {
|
||||
lock = new DistributedLock() {
|
||||
|
||||
private boolean held;
|
||||
|
||||
@Override
|
||||
public String getLockId() {
|
||||
return lockId;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isHeldByCaller() throws UnavailableStateException {
|
||||
return held;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean tryLock() throws UnavailableStateException, InterruptedException {
|
||||
if (held) {
|
||||
return false;
|
||||
}
|
||||
held = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void unlock() throws UnavailableStateException {
|
||||
held = false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addListener(UnavailableLockListener listener) {
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void removeListener(UnavailableLockListener listener) {
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
held = false;
|
||||
}
|
||||
};
|
||||
} else if (!lock.getLockId().equals(lockId)) {
|
||||
throw new IllegalStateException("This shouldn't happen");
|
||||
}
|
||||
return lock;
|
||||
}
|
||||
|
||||
@Override
|
||||
public MutableLong getMutableLong(String mutableLongId) throws InterruptedException, ExecutionException, TimeoutException {
|
||||
// TODO
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
stop();
|
||||
}
|
||||
}
|
||||
|
||||
private static void validateManagerConfig(Map<String, String> config) {
|
||||
assertEquals("127.0.0.1:6666", config.get("connect-string"));
|
||||
assertEquals("16000", config.get("session-ms"));
|
||||
assertEquals("2000", config.get("connection-ms"));
|
||||
assertEquals("2", config.get("retries"));
|
||||
assertEquals("2000", config.get("retries-ms"));
|
||||
assertEquals("test", config.get("namespace"));
|
||||
assertEquals("10", config.get("session-percent"));
|
||||
assertEquals(7, config.size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void PrimaryReplicationTest() throws Exception {
|
||||
Configuration configuration = createConfiguration("primary-hapolicy-config.xml");
|
||||
ActiveMQServerImpl server = new ActiveMQServerImpl(configuration);
|
||||
try {
|
||||
server.start();
|
||||
Activation activation = server.getActivation();
|
||||
assertTrue(activation instanceof ReplicationPrimaryActivation);
|
||||
HAPolicy haPolicy = server.getHAPolicy();
|
||||
assertTrue(haPolicy instanceof ReplicationPrimaryPolicy);
|
||||
ReplicationPrimaryPolicy policy = (ReplicationPrimaryPolicy) haPolicy;
|
||||
assertFalse(policy.isAllowAutoFailBack());
|
||||
assertEquals(9876, policy.getInitialReplicationSyncTimeout());
|
||||
assertFalse(policy.canScaleDown());
|
||||
assertFalse(policy.isBackup());
|
||||
assertFalse(policy.isSharedStore());
|
||||
assertTrue(policy.isWaitForActivation());
|
||||
assertEquals("purple", policy.getGroupName());
|
||||
assertEquals("purple", policy.getBackupGroupName());
|
||||
assertEquals("abcdefg", policy.getClusterName());
|
||||
assertFalse(policy.useQuorumManager());
|
||||
// check failback companion backup policy
|
||||
ReplicationBackupPolicy failbackPolicy = policy.getBackupPolicy();
|
||||
assertNotNull(failbackPolicy);
|
||||
assertSame(policy, failbackPolicy.getLivePolicy());
|
||||
assertEquals(policy.getGroupName(), failbackPolicy.getGroupName());
|
||||
assertEquals(policy.getBackupGroupName(), failbackPolicy.getBackupGroupName());
|
||||
assertEquals(policy.getClusterName(), failbackPolicy.getClusterName());
|
||||
assertEquals(failbackPolicy.getMaxSavedReplicatedJournalsSize(), ActiveMQDefaultConfiguration.getDefaultMaxSavedReplicatedJournalsSize());
|
||||
assertTrue(failbackPolicy.isTryFailback());
|
||||
assertTrue(failbackPolicy.isBackup());
|
||||
assertFalse(failbackPolicy.isSharedStore());
|
||||
assertTrue(failbackPolicy.isWaitForActivation());
|
||||
assertFalse(failbackPolicy.useQuorumManager());
|
||||
assertEquals(12345, failbackPolicy.getRetryReplicationWait());
|
||||
// check scale-down properties
|
||||
assertFalse(failbackPolicy.canScaleDown());
|
||||
assertNull(failbackPolicy.getScaleDownClustername());
|
||||
assertNull(failbackPolicy.getScaleDownGroupName());
|
||||
// validate manager
|
||||
DistributedPrimitiveManager manager = ((ReplicationPrimaryActivation) activation).getDistributedManager();
|
||||
assertNotNull(manager);
|
||||
assertEquals(FakeDistributedPrimitiveManager.class.getName(), manager.getClass().getName());
|
||||
MatcherAssert.assertThat(manager, IsInstanceOf.instanceOf(FakeDistributedPrimitiveManager.class));
|
||||
FakeDistributedPrimitiveManager forwardingManager = (FakeDistributedPrimitiveManager) manager;
|
||||
// validate manager config
|
||||
validateManagerConfig(forwardingManager.getConfig());
|
||||
} finally {
|
||||
server.stop();
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void BackupReplicationTest() throws Exception {
|
||||
Configuration configuration = createConfiguration("backup-hapolicy-config.xml");
|
||||
ActiveMQServerImpl server = new ActiveMQServerImpl(configuration);
|
||||
try {
|
||||
server.start();
|
||||
Activation activation = server.getActivation();
|
||||
assertTrue(activation instanceof ReplicationBackupActivation);
|
||||
HAPolicy haPolicy = server.getHAPolicy();
|
||||
assertTrue(haPolicy instanceof ReplicationBackupPolicy);
|
||||
ReplicationBackupPolicy policy = (ReplicationBackupPolicy) haPolicy;
|
||||
assertEquals("tiddles", policy.getGroupName());
|
||||
assertEquals("tiddles", policy.getBackupGroupName());
|
||||
assertEquals("33rrrrr", policy.getClusterName());
|
||||
assertEquals(22, policy.getMaxSavedReplicatedJournalsSize());
|
||||
assertFalse(policy.isTryFailback());
|
||||
assertTrue(policy.isBackup());
|
||||
assertFalse(policy.isSharedStore());
|
||||
assertTrue(policy.isWaitForActivation());
|
||||
assertFalse(policy.useQuorumManager());
|
||||
assertEquals(12345, policy.getRetryReplicationWait());
|
||||
// check scale-down properties
|
||||
assertFalse(policy.canScaleDown());
|
||||
assertNull(policy.getScaleDownClustername());
|
||||
assertNull(policy.getScaleDownGroupName());
|
||||
// check failover companion live policy
|
||||
ReplicationPrimaryPolicy failoverLivePolicy = policy.getLivePolicy();
|
||||
assertNotNull(failoverLivePolicy);
|
||||
assertSame(policy, failoverLivePolicy.getBackupPolicy());
|
||||
assertFalse(failoverLivePolicy.isAllowAutoFailBack());
|
||||
assertEquals(9876, failoverLivePolicy.getInitialReplicationSyncTimeout());
|
||||
assertFalse(failoverLivePolicy.canScaleDown());
|
||||
assertFalse(failoverLivePolicy.isBackup());
|
||||
assertFalse(failoverLivePolicy.isSharedStore());
|
||||
assertTrue(failoverLivePolicy.isWaitForActivation());
|
||||
assertEquals(policy.getGroupName(), failoverLivePolicy.getGroupName());
|
||||
assertEquals(policy.getClusterName(), failoverLivePolicy.getClusterName());
|
||||
assertEquals(policy.getBackupGroupName(), failoverLivePolicy.getBackupGroupName());
|
||||
assertFalse(failoverLivePolicy.useQuorumManager());
|
||||
// check scale-down properties
|
||||
assertFalse(failoverLivePolicy.canScaleDown());
|
||||
assertNull(failoverLivePolicy.getScaleDownClustername());
|
||||
assertNull(failoverLivePolicy.getScaleDownGroupName());
|
||||
// validate manager
|
||||
DistributedPrimitiveManager manager = ((ReplicationBackupActivation) activation).getDistributedManager();
|
||||
assertNotNull(manager);
|
||||
assertEquals(FakeDistributedPrimitiveManager.class.getName(), manager.getClass().getName());
|
||||
MatcherAssert.assertThat(manager, IsInstanceOf.instanceOf(FakeDistributedPrimitiveManager.class));
|
||||
FakeDistributedPrimitiveManager forwardingManager = (FakeDistributedPrimitiveManager) manager;
|
||||
// validate manager config
|
||||
validateManagerConfig(forwardingManager.getConfig());
|
||||
} finally {
|
||||
server.stop();
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void ReplicatedTest() throws Exception {
|
||||
Configuration configuration = createConfiguration("replicated-hapolicy-config.xml");
|
||||
|
|
|
@ -113,6 +113,7 @@ import org.apache.activemq.artemis.core.remoting.impl.invm.TransportConstants;
|
|||
import org.apache.activemq.artemis.core.remoting.impl.netty.NettyAcceptorFactory;
|
||||
import org.apache.activemq.artemis.core.remoting.impl.netty.NettyConnector;
|
||||
import org.apache.activemq.artemis.core.remoting.impl.netty.NettyConnectorFactory;
|
||||
import org.apache.activemq.artemis.core.replication.ReplicationEndpoint;
|
||||
import org.apache.activemq.artemis.core.server.ActiveMQComponent;
|
||||
import org.apache.activemq.artemis.core.server.ActiveMQServer;
|
||||
import org.apache.activemq.artemis.core.server.ActiveMQServerLogger;
|
||||
|
@ -129,6 +130,7 @@ import org.apache.activemq.artemis.core.server.impl.Activation;
|
|||
import org.apache.activemq.artemis.core.server.impl.ActiveMQServerImpl;
|
||||
import org.apache.activemq.artemis.core.server.impl.AddressInfo;
|
||||
import org.apache.activemq.artemis.core.server.impl.LiveOnlyActivation;
|
||||
import org.apache.activemq.artemis.core.server.impl.ReplicationBackupActivation;
|
||||
import org.apache.activemq.artemis.core.server.impl.SharedNothingBackupActivation;
|
||||
import org.apache.activemq.artemis.core.settings.impl.AddressFullMessagePolicy;
|
||||
import org.apache.activemq.artemis.core.settings.impl.AddressSettings;
|
||||
|
@ -1384,6 +1386,8 @@ public abstract class ActiveMQTestBase extends Assert {
|
|||
if (isReplicated) {
|
||||
if (activation instanceof SharedNothingBackupActivation) {
|
||||
isRemoteUpToDate = backup.isReplicaSync();
|
||||
} else if (activation instanceof ReplicationBackupActivation) {
|
||||
isRemoteUpToDate = backup.isReplicaSync();
|
||||
} else {
|
||||
//we may have already failed over and changed the Activation
|
||||
if (actualServer.isStarted()) {
|
||||
|
@ -2517,6 +2521,17 @@ public abstract class ActiveMQTestBase extends Assert {
|
|||
return !hadToInterrupt;
|
||||
}
|
||||
|
||||
protected static ReplicationEndpoint getReplicationEndpoint(ActiveMQServer server) {
|
||||
final Activation activation = server.getActivation();
|
||||
if (activation instanceof SharedNothingBackupActivation) {
|
||||
return ((SharedNothingBackupActivation) activation).getReplicationEndpoint();
|
||||
}
|
||||
if (activation instanceof ReplicationBackupActivation) {
|
||||
return ((ReplicationBackupActivation) activation).getReplicationEndpoint();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
// Private -------------------------------------------------------
|
||||
|
||||
// Inner classes -------------------------------------------------
|
||||
|
|
|
@ -0,0 +1,52 @@
|
|||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<configuration
|
||||
xmlns="urn:activemq"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="urn:activemq /schema/artemis-server.xsd">
|
||||
<core xmlns="urn:activemq:core">
|
||||
<discovery-groups>
|
||||
<discovery-group name="wahey"/>
|
||||
</discovery-groups>
|
||||
<ha-policy>
|
||||
<replication>
|
||||
<backup>
|
||||
<group-name>tiddles</group-name>
|
||||
<max-saved-replicated-journals-size>22</max-saved-replicated-journals-size>
|
||||
<cluster-name>33rrrrr</cluster-name>
|
||||
<initial-replication-sync-timeout>9876</initial-replication-sync-timeout>
|
||||
<retry-replication-wait>12345</retry-replication-wait>
|
||||
<allow-failback>false</allow-failback>
|
||||
<manager>
|
||||
<class-name>
|
||||
org.apache.activemq.artemis.core.config.impl.HAPolicyConfigurationTest$FakeDistributedPrimitiveManager
|
||||
</class-name>
|
||||
<properties>
|
||||
<property key="connect-string" value="127.0.0.1:6666"/>
|
||||
<property key="session-ms" value="16000"/>
|
||||
<property key="connection-ms" value="2000"/>
|
||||
<property key="retries" value="2"/>
|
||||
<property key="retries-ms" value="2000"/>
|
||||
<property key="namespace" value="test"/>
|
||||
<property key="session-percent" value="10"/>
|
||||
</properties>
|
||||
</manager>
|
||||
</backup>
|
||||
</replication>
|
||||
</ha-policy>
|
||||
</core>
|
||||
</configuration>
|
|
@ -0,0 +1,49 @@
|
|||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<configuration
|
||||
xmlns="urn:activemq"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="urn:activemq /schema/artemis-server.xsd">
|
||||
|
||||
<core xmlns="urn:activemq:core">
|
||||
<ha-policy>
|
||||
<replication>
|
||||
<primary>
|
||||
<group-name>purple</group-name>
|
||||
<cluster-name>abcdefg</cluster-name>
|
||||
<initial-replication-sync-timeout>9876</initial-replication-sync-timeout>
|
||||
<retry-replication-wait>12345</retry-replication-wait>
|
||||
<manager>
|
||||
<class-name>
|
||||
org.apache.activemq.artemis.core.config.impl.HAPolicyConfigurationTest$FakeDistributedPrimitiveManager
|
||||
</class-name>
|
||||
<properties>
|
||||
<property key="connect-string" value="127.0.0.1:6666"/>
|
||||
<property key="session-ms" value="16000"/>
|
||||
<property key="connection-ms" value="2000"/>
|
||||
<property key="retries" value="2"/>
|
||||
<property key="retries-ms" value="2000"/>
|
||||
<property key="namespace" value="test"/>
|
||||
<property key="session-percent" value="10"/>
|
||||
</properties>
|
||||
</manager>
|
||||
</primary>
|
||||
</replication>
|
||||
</ha-policy>
|
||||
</core>
|
||||
|
||||
</configuration>
|
|
@ -65,8 +65,8 @@ which we will cover in a later chapter.
|
|||
> message data will not be available after failover.
|
||||
|
||||
The `ha-policy` type configures which strategy a cluster should use to
|
||||
provide the backing up of a servers data. Within this configuration
|
||||
element is configured how a server should behave within the cluster,
|
||||
provide the backing up of a server's data. Within this configuration
|
||||
element we configure how a server should behave within the cluster,
|
||||
either as a master (live), slave (backup) or colocated (both live and
|
||||
backup). This would look something like:
|
||||
|
||||
|
@ -98,6 +98,33 @@ or
|
|||
</ha-policy>
|
||||
```
|
||||
|
||||
*Replication* allows the configuration of two new roles to enable *pluggable quorum* provider configuration, by using:
|
||||
```xml
|
||||
<ha-policy>
|
||||
<replication>
|
||||
<primary/>
|
||||
</replication>
|
||||
</ha-policy>
|
||||
```
|
||||
to configure the classic *master* role, and
|
||||
```xml
|
||||
<ha-policy>
|
||||
<replication>
|
||||
<backup/>
|
||||
</replication>
|
||||
</ha-policy>
|
||||
```
|
||||
for the classic *slave* one.
|
||||
|
||||
If *replication* is configured using such new roles some additional element are required to complete configuration as detailed later.
|
||||
|
||||
### IMPORTANT NOTE ON PLUGGABLE QUORUM VOTE FEATURE
|
||||
|
||||
This feature is still **EXPERIMENTAL**. Extra testing should be done before running this feature into production. Please raise issues eventually found to the ActiveMQ Artemis Mail Lists.
|
||||
|
||||
It means:
|
||||
- it's configuration can change until declared as **officially stable**
|
||||
|
||||
### Data Replication
|
||||
|
||||
When using replication, the live and the backup servers do not share the
|
||||
|
@ -196,19 +223,29 @@ changes and repeats the process.
|
|||
> live server by changing `slave` to `master`.
|
||||
|
||||
Much like in the shared-store case, when the live server stops or
|
||||
crashes, its replicating backup will become active and take over its
|
||||
crashes, it's replicating backup will become active and take over its
|
||||
duties. Specifically, the backup will become active when it loses
|
||||
connection to its live server. This can be problematic because this can
|
||||
also happen because of a temporary network problem. In order to address
|
||||
this issue, the backup will try to determine whether it still can
|
||||
connection to its live server. This can be problematic because it can
|
||||
also happen as the result of temporary network problem.
|
||||
|
||||
The issue can be solved in two different ways, depending on which replication roles are configured:
|
||||
- **classic replication** (`master`/`slave` roles): backup will try to determine whether it still can
|
||||
connect to the other servers in the cluster. If it can connect to more
|
||||
than half the servers, it will become active, if more than half the
|
||||
servers also disappeared with the live, the backup will wait and try
|
||||
reconnecting with the live. This avoids a split brain situation.
|
||||
- **pluggable quorum vote replication** (`primary`/`backup` roles): backup relies on a pluggable quorum provider
|
||||
(configurable via `manager` xml element) to detect if there's any active live.
|
||||
|
||||
> ***NOTE***
|
||||
>
|
||||
> A backup in the **pluggable quorum vote replication** still need to carefully configure
|
||||
> [connection-ttl](connection-ttl.md) in order to promptly issue a request to become live to the quorum service
|
||||
> before failing-over.
|
||||
|
||||
#### Configuration
|
||||
|
||||
To configure the live and backup servers to be a replicating pair,
|
||||
To configure a classic replication's live and backup servers to be a replicating pair,
|
||||
configure the live server in ' `broker.xml` to have:
|
||||
|
||||
```xml
|
||||
|
@ -235,6 +272,30 @@ The backup server must be similarly configured but as a `slave`
|
|||
</ha-policy>
|
||||
```
|
||||
|
||||
To configure a pluggable quorum replication's primary and backup use:
|
||||
|
||||
```xml
|
||||
<ha-policy>
|
||||
<replication>
|
||||
<primary/>
|
||||
</replication>
|
||||
</ha-policy>
|
||||
...
|
||||
<cluster-connections>
|
||||
<cluster-connection name="my-cluster">
|
||||
...
|
||||
</cluster-connection>
|
||||
</cluster-connections>
|
||||
```
|
||||
and
|
||||
```xml
|
||||
<ha-policy>
|
||||
<replication>
|
||||
<backup/>
|
||||
</replication>
|
||||
</ha-policy>
|
||||
```
|
||||
|
||||
#### All Replication Configuration
|
||||
|
||||
The following table lists all the `ha-policy` configuration elements for
|
||||
|
@ -308,6 +369,130 @@ replica to acknowledge it has received all the necessary data. The
|
|||
default is 30,000 milliseconds. **Note:** during this interval any
|
||||
journal related operations will be blocked.
|
||||
|
||||
#### Pluggable Quorum Vote Replication configurations
|
||||
Pluggable Quorum Vote replication configuration options are a bit different
|
||||
from classic replication, mostly because of its customizable nature.
|
||||
|
||||
[Apache curator](https://curator.apache.org/) is used by the default quorum provider.
|
||||
|
||||
Below some example configurations to show how it works.
|
||||
|
||||
For `primary`:
|
||||
```xml
|
||||
<ha-policy>
|
||||
<replication>
|
||||
<primary>
|
||||
<manager>
|
||||
<class-name>org.apache.activemq.artemis.quorum.zookeeper.CuratorDistributedPrimitiveManager</class-name>
|
||||
<properties>
|
||||
<property key="connect-string" value="127.0.0.1:6666,127.0.0.1:6667,127.0.0.1:6668"/>
|
||||
</properties>
|
||||
</manager>
|
||||
</primary>
|
||||
</replication>
|
||||
</ha-policy>
|
||||
```
|
||||
And `backup`:
|
||||
```xml
|
||||
<ha-policy>
|
||||
<replication>
|
||||
<backup>
|
||||
<manager>
|
||||
<class-name>org.apache.activemq.artemis.quorum.zookeeper.CuratorDistributedPrimitiveManager</class-name>
|
||||
<properties>
|
||||
<property key="connect-string" value="127.0.0.1:6666,127.0.0.1:6667,127.0.0.1:6668"/>
|
||||
</properties>
|
||||
</manager>
|
||||
<allow-failback>true</allow-failback>
|
||||
</backup>
|
||||
</replication>
|
||||
</ha-policy>
|
||||
```
|
||||
The configuration of `class-name` as follows
|
||||
```xml
|
||||
<class-name>org.apache.activemq.artemis.quorum.zookeeper.CuratorDistributedPrimitiveManager</class-name>
|
||||
```
|
||||
isn't really needed, because Apache Curator is the default provider, but has been shown for completeness.
|
||||
|
||||
The `properties` element:
|
||||
```xml
|
||||
<properties>
|
||||
<property key="connect-string" value="127.0.0.1:6666,127.0.0.1:6667,127.0.0.1:6668"/>
|
||||
</properties>
|
||||
```
|
||||
can specify a list of `property` elements in the form of key-value pairs, appropriate to what is
|
||||
supported by the specified `class-name` provider.
|
||||
|
||||
Apache Curator's provider allows the following properties:
|
||||
|
||||
- [`connect-string`](https://curator.apache.org/apidocs/org/apache/curator/framework/CuratorFrameworkFactory.Builder.html#connectString(java.lang.String)): (no default)
|
||||
- [`session-ms`](https://curator.apache.org/apidocs/org/apache/curator/framework/CuratorFrameworkFactory.Builder.html#sessionTimeoutMs(int)): (default is 18000 ms)
|
||||
- [`session-percent`](https://curator.apache.org/apidocs/org/apache/curator/framework/CuratorFrameworkFactory.Builder.html#simulatedSessionExpirationPercent(int)): (default is 33); should be <= default,
|
||||
see https://cwiki.apache.org/confluence/display/CURATOR/TN14 for more info
|
||||
- [`connection-ms`](https://curator.apache.org/apidocs/org/apache/curator/framework/CuratorFrameworkFactory.Builder.html#connectionTimeoutMs(int)): (default is 8000 ms)
|
||||
- [`retries`](https://curator.apache.org/apidocs/org/apache/curator/retry/RetryNTimes.html#%3Cinit%3E(int,int)): (default is 1)
|
||||
- [`retries-ms`](https://curator.apache.org/apidocs/org/apache/curator/retry/RetryNTimes.html#%3Cinit%3E(int,int)): (default is 1000 ms)
|
||||
- [`namespace`](https://curator.apache.org/apidocs/org/apache/curator/framework/CuratorFrameworkFactory.Builder.html#namespace(java.lang.String)): (no default)
|
||||
|
||||
Configuration of the [Apache Zookeeper](https://zookeeper.apache.org/) ensemble is the responsibility of the user, but there are few
|
||||
**suggestions to improve the reliability of the quorum service**:
|
||||
- broker `session_ms` must be `>= 2 * server tick time` and `<= 20 * server tick time` as by
|
||||
[Zookeeper 3.6.3 admin guide](https://zookeeper.apache.org/doc/r3.6.3/zookeeperAdmin.html): it directly impacts how fast a backup
|
||||
can failover to an isolated/killed/unresponsive live; the higher, the slower.
|
||||
- GC on broker machine should allow keeping GC pauses within 1/3 of `session_ms` in order to let the Zookeeper heartbeat protocol
|
||||
work reliably. If that is not possible, it is better to increase `session_ms`, accepting a slower failover.
|
||||
- Zookeeper must have enough resources to keep GC (and OS) pauses much smaller than server tick time: please consider carefully if
|
||||
broker and Zookeeper node should share the same physical machine, depending on the expected load of the broker
|
||||
- network isolation protection requires configuring >=3 Zookeeper nodes
|
||||
|
||||
#### *Important*: Notes on pluggable quorum replication configuration
|
||||
|
||||
There are some no longer needed `classic` replication configurations:
|
||||
- `vote-on-replication-failure`
|
||||
- `quorum-vote-wait`
|
||||
- `vote-retries`
|
||||
- `vote-retries-wait`
|
||||
- `check-for-live-server`
|
||||
|
||||
**Notes on replication configuration with [Apache curator](https://curator.apache.org/) quorum provider**
|
||||
|
||||
As said some paragraphs above, `session-ms` affect the failover duration: a backup can
|
||||
failover after `session-ms` expires or if the live broker voluntary give up its role
|
||||
eg during a fail-back/manual broker stop, it happens immediately.
|
||||
|
||||
For the former case (session expiration with live no longer present), the backup broker can detect an unresponsive live by using:
|
||||
1. cluster connection PINGs (affected by [connection-ttl](connection-ttl.md) tuning)
|
||||
2. closed TCP connection notification (depends by TCP configuration and networking stack/topology)
|
||||
|
||||
The suggestion is to tune `connection-ttl` low enough to attempt failover as soon as possible, while taking in consideration that
|
||||
the whole fail-over duration cannot last less than the configured `session-ms`.
|
||||
|
||||
##### Peer or Multi Primary
|
||||
With coordination delegated to the quorum service, roles are less important. It is possible to have two peer servers compete
|
||||
for activation; the winner activating as live, the looser taking up a backup role. On restart, 'any' peer server
|
||||
with the most up to date journal can activate.
|
||||
The instances need to know in advance, what identity they will coordinate on.
|
||||
In the replication 'primary' ha policy we can explicitly set the 'coordination-id' to a common value for all peers in a cluster.
|
||||
|
||||
For `multi primary`:
|
||||
```xml
|
||||
<ha-policy>
|
||||
<replication>
|
||||
<primary>
|
||||
<manager>
|
||||
<class-name>org.apache.activemq.artemis.quorum.zookeeper.CuratorDistributedPrimitiveManager</class-name>
|
||||
<properties>
|
||||
<property key="connect-string" value="127.0.0.1:6666,127.0.0.1:6667,127.0.0.1:6668"/>
|
||||
</properties>
|
||||
</manager>
|
||||
<coordination-id>peer-journal-001</coordination-id>
|
||||
</primary>
|
||||
</replication>
|
||||
</ha-policy>
|
||||
```
|
||||
Note: the string value provided will be converted internally into a 16 byte UUID, so it may not be immediately recognisable or human-readable,
|
||||
however it will ensure that all 'peers' coordinate.
|
||||
|
||||
### Shared Store
|
||||
|
||||
When using a shared store, both live and backup servers share the *same*
|
||||
|
@ -406,19 +591,43 @@ stop. This configuration would look like:
|
|||
</ha-policy>
|
||||
```
|
||||
|
||||
In replication HA mode you need to set an extra property
|
||||
`check-for-live-server` to `true` in the `master` configuration. If set
|
||||
The same configuration option can be set for both replications, classic:
|
||||
```xml
|
||||
<ha-policy>
|
||||
<replication>
|
||||
<slave>
|
||||
<allow-failback>true</allow-failback>
|
||||
</slave>
|
||||
</replication>
|
||||
</ha-policy>
|
||||
```
|
||||
and with pluggable quorum provider:
|
||||
```xml
|
||||
<ha-policy>
|
||||
<replication>
|
||||
<manager>
|
||||
<!-- some meaningful configuration -->
|
||||
</manager>
|
||||
<backup>
|
||||
<allow-failback>true</allow-failback>
|
||||
</backup>
|
||||
</replication>
|
||||
</ha-policy>
|
||||
```
|
||||
|
||||
In both replication HA mode you need to set an extra property
|
||||
`check-for-live-server` to `true` in the `master`/`primary` configuration. If set
|
||||
to true, during start-up a live server will first search the cluster for
|
||||
another server using its nodeID. If it finds one, it will contact this
|
||||
server and try to "fail-back". Since this is a remote replication
|
||||
scenario, the "starting live" will have to synchronize its data with the
|
||||
server running with its ID, once they are in sync, it will request the
|
||||
other server (which it assumes it is a back that has assumed its duties)
|
||||
to shutdown for it to take over. This is necessary because otherwise the
|
||||
other server (which it assumes it is a backup that has assumed its duties)
|
||||
to shutdown, for it to take over. This is necessary because otherwise the
|
||||
live server has no means to know whether there was a fail-over or not,
|
||||
and if there was if the server that took its duties is still running or
|
||||
and if there was, if the server that took its duties is still running or
|
||||
not. To configure this option at your `broker.xml`
|
||||
configuration file as follows:
|
||||
configuration file as follows, for classic replication:
|
||||
|
||||
```xml
|
||||
<ha-policy>
|
||||
|
@ -430,7 +639,30 @@ configuration file as follows:
|
|||
</ha-policy>
|
||||
```
|
||||
|
||||
> **Warning**
|
||||
And pluggable quorum replication:
|
||||
|
||||
```xml
|
||||
<ha-policy>
|
||||
<replication>
|
||||
<manager>
|
||||
<!-- some meaningful configuration -->
|
||||
</manager>
|
||||
<primary>
|
||||
<!-- no need to check-for-live-server anymore -->
|
||||
</primary>
|
||||
</replication>
|
||||
</ha-policy>
|
||||
```
|
||||
|
||||
The key difference from classic replication is that if `master` cannot reach any
|
||||
live server with its nodeID, it activates unilaterally.
|
||||
With `primary`, the responsibilities of coordination are delegated to the quorum provider,
|
||||
there are no unilateral decisions. The `primary` will only activate when
|
||||
it knows that it has the most up to date version of the journal identified by its nodeID.
|
||||
|
||||
In short: a started `primary` cannot become live without consensus.
|
||||
|
||||
> **Warning for classic replication**
|
||||
>
|
||||
> Be aware that if you restart a live server while after failover has
|
||||
> occurred then `check-for-live-server` must be set to `true`. If not the live server
|
||||
|
|
|
@ -62,6 +62,7 @@ under the License.
|
|||
<module>scale-down</module>
|
||||
<module>stop-server-failover</module>
|
||||
<module>transaction-failover</module>
|
||||
<module>zookeeper-single-pair-failback</module>
|
||||
</modules>
|
||||
</profile>
|
||||
<profile>
|
||||
|
@ -81,6 +82,7 @@ under the License.
|
|||
<module>replicated-transaction-failover</module>
|
||||
<module>scale-down</module>
|
||||
<module>transaction-failover</module>
|
||||
<module>zookeeper-single-pair-failback</module>
|
||||
</modules>
|
||||
</profile>
|
||||
</profiles>
|
||||
|
|
|
@ -0,0 +1,110 @@
|
|||
<?xml version='1.0'?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one
|
||||
or more contributor license agreements. See the NOTICE file
|
||||
distributed with this work for additional information
|
||||
regarding copyright ownership. The ASF licenses this file
|
||||
to you under the Apache License, Version 2.0 (the
|
||||
"License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing,
|
||||
software distributed under the License is distributed on an
|
||||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations
|
||||
under the License.
|
||||
-->
|
||||
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<parent>
|
||||
<groupId>org.apache.activemq.examples.failover</groupId>
|
||||
<artifactId>broker-failover</artifactId>
|
||||
<version>2.18.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<artifactId>zookeeper-single-pair-ordered-failback</artifactId>
|
||||
<packaging>jar</packaging>
|
||||
<name>ActiveMQ Artemis Zookeeper Single Pair Ordered Failback Example</name>
|
||||
|
||||
<properties>
|
||||
<activemq.basedir>${project.basedir}/../../../..</activemq.basedir>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.activemq</groupId>
|
||||
<artifactId>artemis-cli</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>jakarta.jms</groupId>
|
||||
<artifactId>jakarta.jms-api</artifactId>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.activemq</groupId>
|
||||
<artifactId>artemis-maven-plugin</artifactId>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>create0</id>
|
||||
<goals>
|
||||
<goal>create</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<!-- this makes it easier in certain envs -->
|
||||
<javaOptions>-Djava.net.preferIPv4Stack=true</javaOptions>
|
||||
<instance>${basedir}/target/server0</instance>
|
||||
<configuration>${basedir}/target/classes/activemq/server0</configuration>
|
||||
<javaOptions>-Dudp-address=${udp-address}</javaOptions>
|
||||
</configuration>
|
||||
</execution>
|
||||
<execution>
|
||||
<id>create1</id>
|
||||
<goals>
|
||||
<goal>create</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<!-- this makes it easier in certain envs -->
|
||||
<javaOptions>-Djava.net.preferIPv4Stack=true</javaOptions>
|
||||
<instance>${basedir}/target/server1</instance>
|
||||
<configuration>${basedir}/target/classes/activemq/server1</configuration>
|
||||
<javaOptions>-Dudp-address=${udp-address}</javaOptions>
|
||||
</configuration>
|
||||
</execution>
|
||||
<execution>
|
||||
<id>runClient</id>
|
||||
<goals>
|
||||
<goal>runClient</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<clientClass>org.apache.activemq.artemis.jms.example.ZookeeperSinglePairFailback</clientClass>
|
||||
<args>
|
||||
<param>${basedir}/target/server0</param>
|
||||
<param>${basedir}/target/server1</param>
|
||||
</args>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.activemq.examples.failover</groupId>
|
||||
<artifactId>zookeeper-single-pair-ordered-failback</artifactId>
|
||||
<version>2.18.0-SNAPSHOT</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-clean-plugin</artifactId>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
|
@ -0,0 +1,179 @@
|
|||
# Zookeeper Single Pair Failback Example
|
||||
|
||||
## Configuring Zookeeper with Docker
|
||||
|
||||
This example demonstrates two servers coupled as a primary-backup pair for high availability (HA) using
|
||||
pluggable quorum vote replication Reference Implementation based on [Apache Curator](https://curator.apache.org/) to use
|
||||
[Apache Zookeeper](https://zookeeper.apache.org/) as external quorum service.
|
||||
|
||||
The example shows a client connection failing over from live to backup when the live broker is crashed and
|
||||
then back to the original live when it is restarted (i.e. "failback").
|
||||
|
||||
To run the example, simply type **mvn verify** from this directory after running a Zookeeper node at `localhost:2181`.
|
||||
|
||||
If no Zookeeper node is configured, can use the commands below (see [Official Zookeeper Docker Image Site](https://hub.docker.com/_/zookeeper)
|
||||
for more details on how configure it).
|
||||
|
||||
Run Zookeeper `3.6.3` with:
|
||||
```
|
||||
$ docker run --name artemis-zk --network host --restart always -d zookeeper:3.6.3
|
||||
```
|
||||
By default, the official docker image exposes `2181 2888 3888 8080` as client, follower, election and AdminServer ports.
|
||||
|
||||
Verify Zookeeper server is correctly started by running:
|
||||
```
|
||||
$ docker logs --follow artemis-zk
|
||||
```
|
||||
It should print the Zookeeper welcome ASCII logs:
|
||||
```
|
||||
ZooKeeper JMX enabled by default
|
||||
Using config: /conf/zoo.cfg
|
||||
2021-08-05 14:29:29,431 [myid:] - INFO [main:QuorumPeerConfig@174] - Reading configuration from: /conf/zoo.cfg
|
||||
2021-08-05 14:29:29,434 [myid:] - INFO [main:QuorumPeerConfig@451] - clientPort is not set
|
||||
2021-08-05 14:29:29,434 [myid:] - INFO [main:QuorumPeerConfig@464] - secureClientPort is not set
|
||||
2021-08-05 14:29:29,434 [myid:] - INFO [main:QuorumPeerConfig@480] - observerMasterPort is not set
|
||||
2021-08-05 14:29:29,435 [myid:] - INFO [main:QuorumPeerConfig@497] - metricsProvider.className is org.apache.zookeeper.metrics.impl.DefaultMetricsProvider
|
||||
2021-08-05 14:29:29,438 [myid:] - ERROR [main:QuorumPeerConfig@722] - Invalid configuration, only one server specified (ignoring)
|
||||
2021-08-05 14:29:29,441 [myid:1] - INFO [main:DatadirCleanupManager@78] - autopurge.snapRetainCount set to 3
|
||||
2021-08-05 14:29:29,441 [myid:1] - INFO [main:DatadirCleanupManager@79] - autopurge.purgeInterval set to 0
|
||||
2021-08-05 14:29:29,441 [myid:1] - INFO [main:DatadirCleanupManager@101] - Purge task is not scheduled.
|
||||
2021-08-05 14:29:29,441 [myid:1] - WARN [main:QuorumPeerMain@138] - Either no config or no quorum defined in config, running in standalone mode
|
||||
2021-08-05 14:29:29,444 [myid:1] - INFO [main:ManagedUtil@44] - Log4j 1.2 jmx support found and enabled.
|
||||
2021-08-05 14:29:29,449 [myid:1] - INFO [main:QuorumPeerConfig@174] - Reading configuration from: /conf/zoo.cfg
|
||||
2021-08-05 14:29:29,449 [myid:1] - INFO [main:QuorumPeerConfig@451] - clientPort is not set
|
||||
2021-08-05 14:29:29,449 [myid:1] - INFO [main:QuorumPeerConfig@464] - secureClientPort is not set
|
||||
2021-08-05 14:29:29,449 [myid:1] - INFO [main:QuorumPeerConfig@480] - observerMasterPort is not set
|
||||
2021-08-05 14:29:29,450 [myid:1] - INFO [main:QuorumPeerConfig@497] - metricsProvider.className is org.apache.zookeeper.metrics.impl.DefaultMetricsProvider
|
||||
2021-08-05 14:29:29,450 [myid:1] - ERROR [main:QuorumPeerConfig@722] - Invalid configuration, only one server specified (ignoring)
|
||||
2021-08-05 14:29:29,451 [myid:1] - INFO [main:ZooKeeperServerMain@122] - Starting server
|
||||
2021-08-05 14:29:29,459 [myid:1] - INFO [main:ServerMetrics@62] - ServerMetrics initialized with provider org.apache.zookeeper.metrics.impl.DefaultMetricsProvider@525f1e4e
|
||||
2021-08-05 14:29:29,461 [myid:1] - INFO [main:FileTxnSnapLog@124] - zookeeper.snapshot.trust.empty : false
|
||||
2021-08-05 14:29:29,467 [myid:1] - INFO [main:ZookeeperBanner@42] -
|
||||
2021-08-05 14:29:29,467 [myid:1] - INFO [main:ZookeeperBanner@42] - ______ _
|
||||
2021-08-05 14:29:29,467 [myid:1] - INFO [main:ZookeeperBanner@42] - |___ / | |
|
||||
2021-08-05 14:29:29,467 [myid:1] - INFO [main:ZookeeperBanner@42] - / / ___ ___ | | __ ___ ___ _ __ ___ _ __
|
||||
2021-08-05 14:29:29,468 [myid:1] - INFO [main:ZookeeperBanner@42] - / / / _ \ / _ \ | |/ / / _ \ / _ \ | '_ \ / _ \ | '__|
|
||||
2021-08-05 14:29:29,468 [myid:1] - INFO [main:ZookeeperBanner@42] - / /__ | (_) | | (_) | | < | __/ | __/ | |_) | | __/ | |
|
||||
2021-08-05 14:29:29,468 [myid:1] - INFO [main:ZookeeperBanner@42] - /_____| \___/ \___/ |_|\_\ \___| \___| | .__/ \___| |_|
|
||||
2021-08-05 14:29:29,468 [myid:1] - INFO [main:ZookeeperBanner@42] - | |
|
||||
2021-08-05 14:29:29,468 [myid:1] - INFO [main:ZookeeperBanner@42] - |_|
|
||||
2021-08-05 14:29:29,468 [myid:1] - INFO [main:ZookeeperBanner@42] -
|
||||
```
|
||||
Alternatively, this command could be executed:
|
||||
```
|
||||
$ docker run -it --rm --network host zookeeper:3.6.3 zkCli.sh -server localhost:2181
|
||||
```
|
||||
Zookeeper server can be reached using localhost:2181 if it output something like:
|
||||
```
|
||||
2021-08-05 14:56:03,739 [myid:localhost:2181] - INFO [main-SendThread(localhost:2181):ClientCnxn$SendThread@1448] - Session establishment complete on server localhost/0:0:0:0:0:0:0:1:2181, session id = 0x100078b8cfc0002, negotiated timeout = 30000
|
||||
|
||||
```
|
||||
Type
|
||||
```
|
||||
[zk: localhost:2181(CONNECTED) 0] quit
|
||||
```
|
||||
to quit the client instance.
|
||||
|
||||
|
||||
## Configuring zookeeper bare metal
|
||||
|
||||
It is possible to run zooKeeper in a bare metal instance for this example as well.
|
||||
|
||||
Simply download [Zookeeper](https://zookeeper.apache.org/releases.html), and use the following zoo.cfg under ./apache-zookeeper/conf:
|
||||
|
||||
```shell
|
||||
# The number of milliseconds of each tick
|
||||
tickTime=2000
|
||||
# The number of ticks that the initial
|
||||
# synchronization phase can take
|
||||
initLimit=10
|
||||
# The number of ticks that can pass between
|
||||
# sending a request and getting an acknowledgement
|
||||
syncLimit=5
|
||||
# the directory where the snapshot is stored.
|
||||
# do not use /tmp for storage, /tmp here is just
|
||||
# example sakes.
|
||||
dataDir=/tmp/datazookeeper
|
||||
# the port at which the clients will connect
|
||||
clientPort=2181
|
||||
```
|
||||
|
||||
And use one of the shells to start Zookeeper such as:
|
||||
|
||||
```shell
|
||||
# From the bin folder under the apache-zookeeper distribution folder
|
||||
$ ./zkServer.sh start-foreground
|
||||
```
|
||||
|
||||
And zookeeper would run normally:
|
||||
|
||||
```
|
||||
2021-08-05 14:10:16,902 [myid:] - INFO [main:DigestAuthenticationProvider@47] - ACL digest algorithm is: SHA1
|
||||
2021-08-05 14:10:16,902 [myid:] - INFO [main:DigestAuthenticationProvider@61] - zookeeper.DigestAuthenticationProvider.enabled = true
|
||||
2021-08-05 14:10:16,905 [myid:] - INFO [main:FileTxnSnapLog@124] - zookeeper.snapshot.trust.empty : false
|
||||
2021-08-05 14:10:16,917 [myid:] - INFO [main:ZookeeperBanner@42] -
|
||||
2021-08-05 14:10:16,917 [myid:] - INFO [main:ZookeeperBanner@42] - ______ _
|
||||
2021-08-05 14:10:16,917 [myid:] - INFO [main:ZookeeperBanner@42] - |___ / | |
|
||||
2021-08-05 14:10:16,917 [myid:] - INFO [main:ZookeeperBanner@42] - / / ___ ___ | | __ ___ ___ _ __ ___ _ __
|
||||
2021-08-05 14:10:16,917 [myid:] - INFO [main:ZookeeperBanner@42] - / / / _ \ / _ \ | |/ / / _ \ / _ \ | '_ \ / _ \ | '__|
|
||||
2021-08-05 14:10:16,917 [myid:] - INFO [main:ZookeeperBanner@42] - / /__ | (_) | | (_) | | < | __/ | __/ | |_) | | __/ | |
|
||||
2021-08-05 14:10:16,918 [myid:] - INFO [main:ZookeeperBanner@42] - /_____| \___/ \___/ |_|\_\ \___| \___| | .__/ \___| |_|
|
||||
2021-08-05 14:10:16,918 [myid:] - INFO [main:ZookeeperBanner@42] - | |
|
||||
2021-08-05 14:10:16,918 [myid:] - INFO [main:ZookeeperBanner@42] - |_|
|
||||
2021-08-05 14:10:16,918 [myid:] - INFO [main:ZookeeperBanner@42] -
|
||||
```
|
||||
|
||||
|
||||
## Configured the brokers
|
||||
|
||||
The 2 brokers of this example are already configured to connect to a single Zookeeper node at the mentioned address, thanks to the XML configuration of their `manager`:
|
||||
```xml
|
||||
<manager>
|
||||
<properties>
|
||||
<property key="connect-string" value="localhost:2181"/>
|
||||
<property key="namespace" value="examples"/>
|
||||
<property key="session-ms" value="18000"/>
|
||||
</properties>
|
||||
</manager>
|
||||
```
|
||||
**NOTE** the `namespace` parameter is used to separate the pair information from others if the Zookeeper node is shared with other applications.
|
||||
|
||||
**WARNING** As already recommended on the [High Availability section](https://activemq.apache.org/components/artemis/documentation/latest/ha.html), a production environment needs >= 3 nodes to protect against network partitions.
|
||||
|
||||
|
||||
##Running the example
|
||||
|
||||
After Zookeeper is started accordingly to any of the portrayed steps here, this example can be run with
|
||||
```shell
|
||||
$ mvn verify
|
||||
```
|
||||
|
||||
```
|
||||
ZookeeperSinglePairFailback-primary-out:2021-08-05 14:15:50,052 INFO [org.apache.activemq.artemis.core.server] AMQ221020: Started KQUEUE Acceptor at localhost:61616 for protocols [CORE,MQTT,AMQP,HORNETQ,STOMP,OPENWIRE]
|
||||
server tcp://localhost:61616 started
|
||||
Started primary
|
||||
Got message: This is text message 20 (redelivered?: false)
|
||||
Got message: This is text message 21 (redelivered?: false)
|
||||
Got message: This is text message 22 (redelivered?: false)
|
||||
Got message: This is text message 23 (redelivered?: false)
|
||||
Got message: This is text message 24 (redelivered?: false)
|
||||
Got message: This is text message 25 (redelivered?: false)
|
||||
Got message: This is text message 26 (redelivered?: false)
|
||||
Got message: This is text message 27 (redelivered?: false)
|
||||
Got message: This is text message 28 (redelivered?: false)
|
||||
Got message: This is text message 29 (redelivered?: false)
|
||||
Acknowledged 3d third of messages
|
||||
**********************************
|
||||
Killing server java.lang.UNIXProcess@dd025d9
|
||||
**********************************
|
||||
**********************************
|
||||
Killing server java.lang.UNIXProcess@3bea478e
|
||||
**********************************
|
||||
[INFO] ------------------------------------------------------------------------
|
||||
[INFO] BUILD SUCCESS
|
||||
[INFO] ------------------------------------------------------------------------
|
||||
[INFO] Total time: 36.629 s
|
||||
[INFO] Finished at: 2021-08-05T14:15:56-04:00
|
||||
[INFO] ------------------------------------------------------------------------
|
||||
clebertsuconic@MacBook-Pro zookeeper-single-pair-failback %
|
||||
```
|
|
@ -0,0 +1,157 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.activemq.artemis.jms.example;
|
||||
|
||||
import javax.jms.Connection;
|
||||
import javax.jms.ConnectionFactory;
|
||||
import javax.jms.JMSException;
|
||||
import javax.jms.MessageConsumer;
|
||||
import javax.jms.MessageProducer;
|
||||
import javax.jms.Queue;
|
||||
import javax.jms.Session;
|
||||
import javax.jms.TextMessage;
|
||||
import javax.naming.InitialContext;
|
||||
|
||||
import org.apache.activemq.artemis.util.ServerUtil;
|
||||
|
||||
/**
|
||||
* Example of live and replicating backup pair using Zookeeper as the quorum provider.
|
||||
* <p>
|
||||
* After both servers are started, the live server is killed and the backup becomes active ("fails-over").
|
||||
* <p>
|
||||
* Later the live server is restarted and takes back its position by asking the backup to stop ("fail-back").
|
||||
*/
|
||||
public class ZookeeperSinglePairFailback {
|
||||
|
||||
private static Process server0;
|
||||
|
||||
private static Process server1;
|
||||
|
||||
public static void main(final String[] args) throws Exception {
|
||||
// Step 0. Prepare Zookeeper Evironment as shown on readme.md
|
||||
|
||||
final int numMessages = 30;
|
||||
|
||||
Connection connection = null;
|
||||
|
||||
InitialContext initialContext = null;
|
||||
|
||||
try {
|
||||
server0 = ServerUtil.startServer(args[0], ZookeeperSinglePairFailback.class.getSimpleName() + "-primary", 0, 30000);
|
||||
server1 = ServerUtil.startServer(args[1], ZookeeperSinglePairFailback.class.getSimpleName() + "-backup", 1, 10000);
|
||||
|
||||
// Step 2. Get an initial context for looking up JNDI from the server #1
|
||||
initialContext = new InitialContext();
|
||||
|
||||
// Step 3. Look up the JMS resources from JNDI
|
||||
Queue queue = (Queue) initialContext.lookup("queue/exampleQueue");
|
||||
ConnectionFactory connectionFactory = (ConnectionFactory) initialContext.lookup("ConnectionFactory");
|
||||
|
||||
// Step 4. Create a JMS Connection
|
||||
connection = connectionFactory.createConnection();
|
||||
|
||||
// Step 5. Create a *non-transacted* JMS Session with client acknowledgement
|
||||
Session session = connection.createSession(false, Session.CLIENT_ACKNOWLEDGE);
|
||||
|
||||
// Step 6. Start the connection to ensure delivery occurs
|
||||
connection.start();
|
||||
|
||||
// Step 7. Create a JMS MessageProducer and a MessageConsumer
|
||||
MessageProducer producer = session.createProducer(queue);
|
||||
MessageConsumer consumer = session.createConsumer(queue);
|
||||
|
||||
// Step 8. Send some messages to server #1, the live server
|
||||
for (int i = 0; i < numMessages; i++) {
|
||||
TextMessage message = session.createTextMessage("This is text message " + i);
|
||||
producer.send(message);
|
||||
System.out.println("Sent message: " + message.getText());
|
||||
}
|
||||
|
||||
// Step 9. Receive and acknowledge a third of the sent messages
|
||||
TextMessage message0 = null;
|
||||
for (int i = 0; i < numMessages / 3; i++) {
|
||||
message0 = (TextMessage) consumer.receive(5000);
|
||||
System.out.println("Got message: " + message0.getText());
|
||||
}
|
||||
message0.acknowledge();
|
||||
System.out.println("Received and acknowledged a third of the sent messages");
|
||||
|
||||
// Step 10. Receive the rest third of the sent messages but *do not* acknowledge them yet
|
||||
for (int i = numMessages / 3; i < numMessages; i++) {
|
||||
message0 = (TextMessage) consumer.receive(5000);
|
||||
System.out.println("Got message: " + message0.getText());
|
||||
}
|
||||
System.out.println("Received without acknowledged the rest of the sent messages");
|
||||
|
||||
Thread.sleep(2000);
|
||||
// Step 11. Crash server #0, the live server, and wait a little while to make sure
|
||||
// it has really crashed
|
||||
ServerUtil.killServer(server0);
|
||||
System.out.println("Killed primary");
|
||||
|
||||
Thread.sleep(2000);
|
||||
|
||||
// Step 12. Acknowledging the received messages will fail as failover to the backup server has occurred
|
||||
try {
|
||||
message0.acknowledge();
|
||||
} catch (JMSException e) {
|
||||
System.out.println("Got (the expected) exception while acknowledging message: " + e.getMessage());
|
||||
}
|
||||
|
||||
// Step 13. Consume again the 2nd third of the messages again. Note that they are not considered as redelivered.
|
||||
for (int i = numMessages / 3; i < (numMessages / 3) * 2; i++) {
|
||||
message0 = (TextMessage) consumer.receive(5000);
|
||||
System.out.printf("Got message: %s (redelivered?: %s)\n", message0.getText(), message0.getJMSRedelivered());
|
||||
}
|
||||
|
||||
// Step 14. Acknowledging them on the failed-over broker works fine
|
||||
message0.acknowledge();
|
||||
System.out.println("Acknowledged 2n third of messages");
|
||||
|
||||
// Step 15. Restarting primary
|
||||
server0 = ServerUtil.startServer(args[0], ZookeeperSinglePairFailback.class.getSimpleName() + "-primary", 0, 10000);
|
||||
System.out.println("Started primary");
|
||||
|
||||
// await fail-back to complete
|
||||
Thread.sleep(4000);
|
||||
|
||||
// Step 16. Consuming the 3rd third of the messages. Note that they are not considered as redelivered.
|
||||
for (int i = (numMessages / 3) * 2; i < numMessages; i++) {
|
||||
message0 = (TextMessage) consumer.receive(5000);
|
||||
System.out.printf("Got message: %s (redelivered?: %s)\n", message0.getText(), message0.getJMSRedelivered());
|
||||
}
|
||||
message0.acknowledge();
|
||||
System.out.println("Acknowledged 3d third of messages");
|
||||
|
||||
} finally {
|
||||
// Step 17. Be sure to close our resources!
|
||||
|
||||
if (connection != null) {
|
||||
connection.close();
|
||||
}
|
||||
|
||||
if (initialContext != null) {
|
||||
initialContext.close();
|
||||
}
|
||||
|
||||
ServerUtil.killServer(server0);
|
||||
ServerUtil.killServer(server1);
|
||||
|
||||
// Step 18. stop the ZK server
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,90 @@
|
|||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one
|
||||
or more contributor license agreements. See the NOTICE file
|
||||
distributed with this work for additional information
|
||||
regarding copyright ownership. The ASF licenses this file
|
||||
to you under the Apache License, Version 2.0 (the
|
||||
"License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing,
|
||||
software distributed under the License is distributed on an
|
||||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations
|
||||
under the License.
|
||||
-->
|
||||
<configuration xmlns="urn:activemq" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="urn:activemq /schema/artemis-configuration.xsd">
|
||||
<core xmlns="urn:activemq:core">
|
||||
|
||||
<bindings-directory>./data/bindings</bindings-directory>
|
||||
|
||||
<journal-directory>./data/journal</journal-directory>
|
||||
|
||||
<large-messages-directory>./data/largemessages</large-messages-directory>
|
||||
|
||||
<paging-directory>./data/paging</paging-directory>
|
||||
|
||||
<cluster-user>exampleUser</cluster-user>
|
||||
|
||||
<cluster-password>secret</cluster-password>
|
||||
|
||||
<ha-policy>
|
||||
<replication>
|
||||
<primary>
|
||||
<manager>
|
||||
<properties>
|
||||
<property key="connect-string" value="localhost:2181"/>
|
||||
<property key="namespace" value="examples"/>
|
||||
<property key="session-ms" value="18000"/>
|
||||
</properties>
|
||||
</manager>
|
||||
</primary>
|
||||
</replication>
|
||||
</ha-policy>
|
||||
|
||||
<connectors>
|
||||
<connector name="netty-connector">tcp://localhost:61616</connector>
|
||||
<connector name="netty-backup-connector">tcp://localhost:61617</connector>
|
||||
</connectors>
|
||||
|
||||
<!-- Acceptors -->
|
||||
<acceptors>
|
||||
<acceptor name="netty-acceptor">tcp://localhost:61616</acceptor>
|
||||
</acceptors>
|
||||
|
||||
<cluster-connections>
|
||||
<cluster-connection name="my-cluster">
|
||||
<connector-ref>netty-connector</connector-ref>
|
||||
<static-connectors>
|
||||
<connector-ref>netty-backup-connector</connector-ref>
|
||||
</static-connectors>
|
||||
</cluster-connection>
|
||||
</cluster-connections>
|
||||
|
||||
<!-- Other config -->
|
||||
|
||||
<security-settings>
|
||||
<!--security for example queue-->
|
||||
<security-setting match="exampleQueue">
|
||||
<permission roles="guest" type="createDurableQueue"/>
|
||||
<permission roles="guest" type="deleteDurableQueue"/>
|
||||
<permission roles="guest" type="createNonDurableQueue"/>
|
||||
<permission roles="guest" type="deleteNonDurableQueue"/>
|
||||
<permission roles="guest" type="consume"/>
|
||||
<permission roles="guest" type="send"/>
|
||||
</security-setting>
|
||||
</security-settings>
|
||||
|
||||
<addresses>
|
||||
<address name="exampleQueue">
|
||||
<anycast>
|
||||
<queue name="exampleQueue"/>
|
||||
</anycast>
|
||||
</address>
|
||||
</addresses>
|
||||
</core>
|
||||
</configuration>
|
|
@ -0,0 +1,91 @@
|
|||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one
|
||||
or more contributor license agreements. See the NOTICE file
|
||||
distributed with this work for additional information
|
||||
regarding copyright ownership. The ASF licenses this file
|
||||
to you under the Apache License, Version 2.0 (the
|
||||
"License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing,
|
||||
software distributed under the License is distributed on an
|
||||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations
|
||||
under the License.
|
||||
-->
|
||||
<configuration xmlns="urn:activemq" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="urn:activemq /schema/artemis-configuration.xsd">
|
||||
<core xmlns="urn:activemq:core">
|
||||
|
||||
<bindings-directory>./data/bindings</bindings-directory>
|
||||
|
||||
<journal-directory>./data/journal</journal-directory>
|
||||
|
||||
<large-messages-directory>./data/largemessages</large-messages-directory>
|
||||
|
||||
<paging-directory>./data/paging</paging-directory>
|
||||
|
||||
<cluster-user>exampleUser</cluster-user>
|
||||
|
||||
<cluster-password>secret</cluster-password>
|
||||
|
||||
<ha-policy>
|
||||
<replication>
|
||||
<backup>
|
||||
<manager>
|
||||
<properties>
|
||||
<property key="connect-string" value="localhost:2181"/>
|
||||
<property key="namespace" value="examples"/>
|
||||
<property key="session-ms" value="18000"/>
|
||||
</properties>
|
||||
</manager>
|
||||
<allow-failback>true</allow-failback>
|
||||
</backup>
|
||||
</replication>
|
||||
</ha-policy>
|
||||
|
||||
<!-- Connectors -->
|
||||
<connectors>
|
||||
<connector name="netty-live-connector">tcp://localhost:61616</connector>
|
||||
<connector name="netty-connector">tcp://localhost:61617</connector>
|
||||
</connectors>
|
||||
|
||||
<!-- Acceptors -->
|
||||
<acceptors>
|
||||
<acceptor name="netty-acceptor">tcp://localhost:61617</acceptor>
|
||||
</acceptors>
|
||||
|
||||
<cluster-connections>
|
||||
<cluster-connection name="my-cluster">
|
||||
<connector-ref>netty-connector</connector-ref>
|
||||
<static-connectors>
|
||||
<connector-ref>netty-live-connector</connector-ref>
|
||||
</static-connectors>
|
||||
</cluster-connection>
|
||||
</cluster-connections>
|
||||
<!-- Other config -->
|
||||
|
||||
<security-settings>
|
||||
<!--security for example queue-->
|
||||
<security-setting match="exampleQueue">
|
||||
<permission roles="guest" type="createDurableQueue"/>
|
||||
<permission roles="guest" type="deleteDurableQueue"/>
|
||||
<permission roles="guest" type="createNonDurableQueue"/>
|
||||
<permission roles="guest" type="deleteNonDurableQueue"/>
|
||||
<permission roles="guest" type="consume"/>
|
||||
<permission roles="guest" type="send"/>
|
||||
</security-setting>
|
||||
</security-settings>
|
||||
|
||||
<addresses>
|
||||
<address name="exampleQueue">
|
||||
<anycast>
|
||||
<queue name="exampleQueue"/>
|
||||
</anycast>
|
||||
</address>
|
||||
</addresses>
|
||||
</core>
|
||||
</configuration>
|
|
@ -0,0 +1,20 @@
|
|||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
java.naming.factory.initial=org.apache.activemq.artemis.jndi.ActiveMQInitialContextFactory
|
||||
connectionFactory.ConnectionFactory=tcp://localhost:61616?ha=true&retryInterval=1000&retryIntervalMultiplier=1.0&reconnectAttempts=-1
|
||||
queue.queue/exampleQueue=exampleQueue
|
31
pom.xml
31
pom.xml
|
@ -64,6 +64,8 @@
|
|||
<module>artemis-distribution</module>
|
||||
<module>tests</module>
|
||||
<module>artemis-features</module>
|
||||
<module>artemis-quorum-api</module>
|
||||
<module>artemis-quorum-ri</module>
|
||||
</modules>
|
||||
|
||||
<name>ActiveMQ Artemis Parent</name>
|
||||
|
@ -105,6 +107,9 @@
|
|||
<mockito.version>3.11.2</mockito.version>
|
||||
<jctools.version>2.1.2</jctools.version>
|
||||
<netty.version>4.1.66.Final</netty.version>
|
||||
<curator.version>5.1.0</curator.version>
|
||||
<!-- While waiting https://issues.apache.org/jira/browse/CURATOR-595 fix -->
|
||||
<zookeeper.version>3.6.3</zookeeper.version>
|
||||
|
||||
<!-- this is basically for tests -->
|
||||
<netty-tcnative-version>2.0.40.Final</netty-tcnative-version>
|
||||
|
@ -851,6 +856,32 @@
|
|||
<artifactId>jakarta.security.auth.message-api</artifactId>
|
||||
<version>${jakarta.security.auth.message-api.version}</version>
|
||||
</dependency>
|
||||
<!-- Curator Zookeeper RI -->
|
||||
<dependency>
|
||||
<groupId>org.apache.curator</groupId>
|
||||
<artifactId>curator-recipes</artifactId>
|
||||
<version>${curator.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.curator</groupId>
|
||||
<artifactId>curator-framework</artifactId>
|
||||
<version>${curator.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.curator</groupId>
|
||||
<artifactId>curator-client</artifactId>
|
||||
<version>${curator.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.zookeeper</groupId>
|
||||
<artifactId>zookeeper</artifactId>
|
||||
<version>${zookeeper.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.zookeeper</groupId>
|
||||
<artifactId>zookeeper-jute</artifactId>
|
||||
<version>${zookeeper.version}</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
</dependencyManagement>
|
||||
|
|
|
@ -44,9 +44,9 @@ public class ScaleDownFailoverTest extends ClusterTestBase {
|
|||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
stopCount = 0;
|
||||
setupLiveServer(0, isFileStorage(), false, isNetty(), true);
|
||||
setupLiveServer(1, isFileStorage(), false, isNetty(), true);
|
||||
setupLiveServer(2, isFileStorage(), false, isNetty(), true);
|
||||
setupLiveServer(0, isFileStorage(), HAType.SharedNothingReplication, isNetty(), true);
|
||||
setupLiveServer(1, isFileStorage(), HAType.SharedNothingReplication, isNetty(), true);
|
||||
setupLiveServer(2, isFileStorage(), HAType.SharedNothingReplication, isNetty(), true);
|
||||
ScaleDownConfiguration scaleDownConfiguration = new ScaleDownConfiguration();
|
||||
ScaleDownConfiguration scaleDownConfiguration2 = new ScaleDownConfiguration();
|
||||
scaleDownConfiguration2.setEnabled(false);
|
||||
|
|
|
@ -35,8 +35,8 @@ public class ScaleDownFailureTest extends ClusterTestBase {
|
|||
@Before
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
setupLiveServer(0, isFileStorage(), false, isNetty(), true);
|
||||
setupLiveServer(1, isFileStorage(), false, isNetty(), true);
|
||||
setupLiveServer(0, isFileStorage(), HAType.SharedNothingReplication, isNetty(), true);
|
||||
setupLiveServer(1, isFileStorage(), HAType.SharedNothingReplication, isNetty(), true);
|
||||
if (isGrouped()) {
|
||||
ScaleDownConfiguration scaleDownConfiguration = new ScaleDownConfiguration();
|
||||
scaleDownConfiguration.setGroupName("bill");
|
||||
|
|
|
@ -51,6 +51,12 @@
|
|||
<scope>test</scope>
|
||||
<type>test-jar</type>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.activemq</groupId>
|
||||
<artifactId>artemis-quorum-ri</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.activemq.tests</groupId>
|
||||
<artifactId>unit-tests</artifactId>
|
||||
|
|
|
@ -77,7 +77,6 @@ public class InfiniteRedeliveryTest extends ActiveMQTestBase {
|
|||
|
||||
Configuration backupConfig;
|
||||
Configuration liveConfig;
|
||||
NodeManager nodeManager;
|
||||
|
||||
protected TestableServer createTestableServer(Configuration config, NodeManager nodeManager) throws Exception {
|
||||
boolean isBackup = config.getHAPolicyConfiguration() instanceof ReplicaPolicyConfiguration || config.getHAPolicyConfiguration() instanceof SharedStoreSlavePolicyConfiguration;
|
||||
|
@ -93,20 +92,25 @@ public class InfiniteRedeliveryTest extends ActiveMQTestBase {
|
|||
backupConfig = createDefaultConfig(0, true);
|
||||
liveConfig = createDefaultConfig(0, true);
|
||||
|
||||
ReplicatedBackupUtils.configureReplicationPair(backupConfig, backupConnector, backupAcceptor, liveConfig, liveConnector, null);
|
||||
configureReplicationPair(backupConnector, backupAcceptor, liveConnector);
|
||||
|
||||
backupConfig.setBindingsDirectory(getBindingsDir(0, true)).setJournalDirectory(getJournalDir(0, true)).setPagingDirectory(getPageDir(0, true)).setLargeMessagesDirectory(getLargeMessagesDir(0, true)).setSecurityEnabled(false);
|
||||
|
||||
((ReplicaPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setMaxSavedReplicatedJournalsSize(-1).setAllowFailBack(true);
|
||||
((ReplicaPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setRestartBackup(false);
|
||||
|
||||
nodeManager = new InVMNodeManager(true, backupConfig.getJournalLocation());
|
||||
|
||||
backupServer = createTestableServer(backupConfig, nodeManager);
|
||||
backupServer = createTestableServer(backupConfig, new InVMNodeManager(true, backupConfig.getJournalLocation()));
|
||||
|
||||
liveConfig.clearAcceptorConfigurations().addAcceptorConfiguration(TransportConfigurationUtils.getNettyAcceptor(true, 0));
|
||||
|
||||
liveServer = createTestableServer(liveConfig, nodeManager);
|
||||
liveServer = createTestableServer(liveConfig, new InVMNodeManager(false, liveConfig.getJournalLocation()));
|
||||
}
|
||||
|
||||
protected void configureReplicationPair(TransportConfiguration backupConnector,
|
||||
TransportConfiguration backupAcceptor,
|
||||
TransportConfiguration liveConnector) {
|
||||
ReplicatedBackupUtils.configureReplicationPair(backupConfig, backupConnector, backupAcceptor, liveConfig, liveConnector, null);
|
||||
((ReplicaPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setMaxSavedReplicatedJournalsSize(-1).setAllowFailBack(true);
|
||||
((ReplicaPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setRestartBackup(false);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,55 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.activemq.artemis.tests.integration.client;
|
||||
|
||||
import java.util.Collections;
|
||||
|
||||
import org.apache.activemq.artemis.api.core.TransportConfiguration;
|
||||
import org.apache.activemq.artemis.core.config.ha.DistributedPrimitiveManagerConfiguration;
|
||||
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
|
||||
import org.apache.activemq.artemis.quorum.file.FileBasedPrimitiveManager;
|
||||
import org.apache.activemq.artemis.tests.util.ReplicatedBackupUtils;
|
||||
import org.junit.Before;
|
||||
|
||||
public class PluggableQuorumInfiniteRedeliveryTest extends InfiniteRedeliveryTest {
|
||||
|
||||
private DistributedPrimitiveManagerConfiguration managerConfiguration;
|
||||
|
||||
public PluggableQuorumInfiniteRedeliveryTest(String protocol, boolean useCLI) {
|
||||
super(protocol, useCLI);
|
||||
}
|
||||
|
||||
@Before
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
this.managerConfiguration = new DistributedPrimitiveManagerConfiguration(FileBasedPrimitiveManager.class.getName(),
|
||||
Collections.singletonMap("locks-folder", temporaryFolder.newFolder("manager").toString()));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void configureReplicationPair(TransportConfiguration backupConnector,
|
||||
TransportConfiguration backupAcceptor,
|
||||
TransportConfiguration liveConnector) {
|
||||
|
||||
ReplicatedBackupUtils.configurePluggableQuorumReplicationPair(backupConfig, backupConnector, backupAcceptor,
|
||||
liveConfig, liveConnector, null,
|
||||
managerConfiguration, managerConfiguration);
|
||||
((ReplicationBackupPolicyConfiguration) backupConfig.getHAPolicyConfiguration())
|
||||
.setMaxSavedReplicatedJournalsSize(-1).setAllowFailBack(true);
|
||||
}
|
||||
}
|
|
@ -17,6 +17,7 @@
|
|||
package org.apache.activemq.artemis.tests.integration.cluster.distribution;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintWriter;
|
||||
import java.io.StringWriter;
|
||||
import java.net.URI;
|
||||
|
@ -56,9 +57,12 @@ import org.apache.activemq.artemis.core.client.impl.TopologyMemberImpl;
|
|||
import org.apache.activemq.artemis.core.config.ClusterConnectionConfiguration;
|
||||
import org.apache.activemq.artemis.core.config.Configuration;
|
||||
import org.apache.activemq.artemis.core.config.HAPolicyConfiguration;
|
||||
import org.apache.activemq.artemis.core.config.ha.DistributedPrimitiveManagerConfiguration;
|
||||
import org.apache.activemq.artemis.core.config.ha.LiveOnlyPolicyConfiguration;
|
||||
import org.apache.activemq.artemis.core.config.ha.ReplicaPolicyConfiguration;
|
||||
import org.apache.activemq.artemis.core.config.ha.ReplicatedPolicyConfiguration;
|
||||
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
|
||||
import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration;
|
||||
import org.apache.activemq.artemis.core.config.ha.SharedStoreMasterPolicyConfiguration;
|
||||
import org.apache.activemq.artemis.core.config.ha.SharedStoreSlavePolicyConfiguration;
|
||||
import org.apache.activemq.artemis.core.postoffice.Binding;
|
||||
|
@ -85,6 +89,7 @@ import org.apache.activemq.artemis.core.server.group.GroupingHandler;
|
|||
import org.apache.activemq.artemis.core.server.group.impl.GroupingHandlerConfiguration;
|
||||
import org.apache.activemq.artemis.core.server.impl.AddressInfo;
|
||||
import org.apache.activemq.artemis.core.server.impl.InVMNodeManager;
|
||||
import org.apache.activemq.artemis.quorum.file.FileBasedPrimitiveManager;
|
||||
import org.apache.activemq.artemis.tests.util.ActiveMQTestBase;
|
||||
import org.apache.activemq.artemis.utils.PortCheckRule;
|
||||
import org.jboss.logging.Logger;
|
||||
|
@ -134,6 +139,21 @@ public abstract class ClusterTestBase extends ActiveMQTestBase {
|
|||
return true;
|
||||
}
|
||||
|
||||
private DistributedPrimitiveManagerConfiguration pluggableQuorumConfiguration = null;
|
||||
|
||||
private DistributedPrimitiveManagerConfiguration getOrCreatePluggableQuorumConfiguration() {
|
||||
if (pluggableQuorumConfiguration != null) {
|
||||
return pluggableQuorumConfiguration;
|
||||
}
|
||||
try {
|
||||
pluggableQuorumConfiguration = new DistributedPrimitiveManagerConfiguration(FileBasedPrimitiveManager.class.getName(), Collections.singletonMap("locks-folder", temporaryFolder.newFolder("manager").toString()));
|
||||
} catch (IOException ioException) {
|
||||
log.error(ioException);
|
||||
return null;
|
||||
}
|
||||
return pluggableQuorumConfiguration;
|
||||
}
|
||||
|
||||
@Override
|
||||
@Before
|
||||
public void setUp() throws Exception {
|
||||
|
@ -159,11 +179,19 @@ public abstract class ClusterTestBase extends ActiveMQTestBase {
|
|||
|
||||
}
|
||||
|
||||
public enum HAType {
|
||||
SharedStore, SharedNothingReplication, PluggableQuorumReplication
|
||||
}
|
||||
|
||||
protected HAType haType() {
|
||||
return HAType.SharedNothingReplication;
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether the servers share the storage or not.
|
||||
*/
|
||||
protected boolean isSharedStore() {
|
||||
return false;
|
||||
protected final boolean isSharedStore() {
|
||||
return HAType.SharedStore.equals(haType());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -1481,14 +1509,14 @@ public abstract class ClusterTestBase extends ActiveMQTestBase {
|
|||
}
|
||||
|
||||
protected void setupServer(final int node, final boolean fileStorage, final boolean netty) throws Exception {
|
||||
setupLiveServer(node, fileStorage, false, netty, false);
|
||||
setupLiveServer(node, fileStorage, HAType.SharedNothingReplication, netty, false);
|
||||
}
|
||||
|
||||
protected void setupLiveServer(final int node,
|
||||
final boolean fileStorage,
|
||||
final boolean netty,
|
||||
boolean isLive) throws Exception {
|
||||
setupLiveServer(node, fileStorage, false, netty, isLive);
|
||||
setupLiveServer(node, fileStorage, HAType.SharedNothingReplication, netty, isLive);
|
||||
}
|
||||
|
||||
protected boolean isResolveProtocols() {
|
||||
|
@ -1497,27 +1525,26 @@ public abstract class ClusterTestBase extends ActiveMQTestBase {
|
|||
|
||||
protected void setupLiveServer(final int node,
|
||||
final boolean fileStorage,
|
||||
final boolean sharedStorage,
|
||||
final HAType haType,
|
||||
final boolean netty,
|
||||
boolean liveOnly) throws Exception {
|
||||
if (servers[node] != null) {
|
||||
throw new IllegalArgumentException("Already a server at node " + node);
|
||||
}
|
||||
|
||||
HAPolicyConfiguration haPolicyConfiguration = null;
|
||||
final HAPolicyConfiguration haPolicyConfiguration;
|
||||
if (liveOnly) {
|
||||
haPolicyConfiguration = new LiveOnlyPolicyConfiguration();
|
||||
} else {
|
||||
if (sharedStorage)
|
||||
haPolicyConfiguration = new SharedStoreMasterPolicyConfiguration();
|
||||
else
|
||||
haPolicyConfiguration = new ReplicatedPolicyConfiguration();
|
||||
haPolicyConfiguration = haPolicyLiveConfiguration(haType);
|
||||
}
|
||||
|
||||
Configuration configuration = createBasicConfig(node).setJournalMaxIO_AIO(1000).setThreadPoolMaxSize(10).clearAcceptorConfigurations().addAcceptorConfiguration(createTransportConfiguration(netty, true, generateParams(node, netty))).setHAPolicyConfiguration(haPolicyConfiguration).setResolveProtocols(isResolveProtocols());
|
||||
|
||||
ActiveMQServer server;
|
||||
|
||||
final boolean sharedStorage = HAType.SharedStore.equals(haType);
|
||||
|
||||
if (fileStorage) {
|
||||
if (sharedStorage) {
|
||||
server = createInVMFailoverServer(true, configuration, nodeManagers[node], node);
|
||||
|
@ -1538,6 +1565,20 @@ public abstract class ClusterTestBase extends ActiveMQTestBase {
|
|||
servers[node] = addServer(server);
|
||||
}
|
||||
|
||||
private HAPolicyConfiguration haPolicyLiveConfiguration(HAType haType) {
|
||||
switch (haType) {
|
||||
case SharedStore:
|
||||
return new SharedStoreMasterPolicyConfiguration();
|
||||
case SharedNothingReplication:
|
||||
return new ReplicatedPolicyConfiguration();
|
||||
case PluggableQuorumReplication:
|
||||
return ReplicationPrimaryPolicyConfiguration.withDefault()
|
||||
.setDistributedManagerConfiguration(getOrCreatePluggableQuorumConfiguration());
|
||||
default:
|
||||
throw new AssertionError("Unsupported haType = " + haType);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Server lacks a {@link ClusterConnectionConfiguration} necessary for the remote (replicating)
|
||||
* backup case.
|
||||
|
@ -1549,14 +1590,14 @@ public abstract class ClusterTestBase extends ActiveMQTestBase {
|
|||
* @param node
|
||||
* @param liveNode
|
||||
* @param fileStorage
|
||||
* @param sharedStorage
|
||||
* @param haType
|
||||
* @param netty
|
||||
* @throws Exception
|
||||
*/
|
||||
protected void setupBackupServer(final int node,
|
||||
final int liveNode,
|
||||
final boolean fileStorage,
|
||||
final boolean sharedStorage,
|
||||
final HAType haType,
|
||||
final boolean netty) throws Exception {
|
||||
if (servers[node] != null) {
|
||||
throw new IllegalArgumentException("Already a server at node " + node);
|
||||
|
@ -1566,7 +1607,9 @@ public abstract class ClusterTestBase extends ActiveMQTestBase {
|
|||
TransportConfiguration backupConfig = createTransportConfiguration(netty, false, generateParams(node, netty));
|
||||
TransportConfiguration acceptorConfig = createTransportConfiguration(netty, true, generateParams(node, netty));
|
||||
|
||||
Configuration configuration = createBasicConfig(sharedStorage ? liveNode : node).clearAcceptorConfigurations().addAcceptorConfiguration(acceptorConfig).addConnectorConfiguration(liveConfig.getName(), liveConfig).addConnectorConfiguration(backupConfig.getName(), backupConfig).setHAPolicyConfiguration(sharedStorage ? new SharedStoreSlavePolicyConfiguration() : new ReplicaPolicyConfiguration());
|
||||
final boolean sharedStorage = HAType.SharedStore.equals(haType);
|
||||
|
||||
Configuration configuration = createBasicConfig(sharedStorage ? liveNode : node).clearAcceptorConfigurations().addAcceptorConfiguration(acceptorConfig).addConnectorConfiguration(liveConfig.getName(), liveConfig).addConnectorConfiguration(backupConfig.getName(), backupConfig).setHAPolicyConfiguration(haPolicyBackupConfiguration(haType));
|
||||
|
||||
ActiveMQServer server;
|
||||
|
||||
|
@ -1580,6 +1623,21 @@ public abstract class ClusterTestBase extends ActiveMQTestBase {
|
|||
servers[node] = addServer(server);
|
||||
}
|
||||
|
||||
private HAPolicyConfiguration haPolicyBackupConfiguration(HAType haType) {
|
||||
switch (haType) {
|
||||
|
||||
case SharedStore:
|
||||
return new SharedStoreSlavePolicyConfiguration();
|
||||
case SharedNothingReplication:
|
||||
return new ReplicaPolicyConfiguration();
|
||||
case PluggableQuorumReplication:
|
||||
return ReplicationBackupPolicyConfiguration.withDefault()
|
||||
.setDistributedManagerConfiguration(getOrCreatePluggableQuorumConfiguration());
|
||||
default:
|
||||
throw new AssertionError("Unsupported ha type = " + haType);
|
||||
}
|
||||
}
|
||||
|
||||
protected void setupLiveServerWithDiscovery(final int node,
|
||||
final String groupAddress,
|
||||
final int port,
|
||||
|
|
|
@ -87,14 +87,14 @@ public class ClusterWithBackupTest extends ClusterTestBase {
|
|||
|
||||
protected void setupServers() throws Exception {
|
||||
// The backups
|
||||
setupBackupServer(0, 3, isFileStorage(), true, isNetty());
|
||||
setupBackupServer(1, 4, isFileStorage(), true, isNetty());
|
||||
setupBackupServer(2, 5, isFileStorage(), true, isNetty());
|
||||
setupBackupServer(0, 3, isFileStorage(), HAType.SharedStore, isNetty());
|
||||
setupBackupServer(1, 4, isFileStorage(), HAType.SharedStore, isNetty());
|
||||
setupBackupServer(2, 5, isFileStorage(), HAType.SharedStore, isNetty());
|
||||
|
||||
// The lives
|
||||
setupLiveServer(3, isFileStorage(), true, isNetty(), false);
|
||||
setupLiveServer(4, isFileStorage(), true, isNetty(), false);
|
||||
setupLiveServer(5, isFileStorage(), true, isNetty(), false);
|
||||
setupLiveServer(3, isFileStorage(), HAType.SharedStore, isNetty(), false);
|
||||
setupLiveServer(4, isFileStorage(), HAType.SharedStore, isNetty(), false);
|
||||
setupLiveServer(5, isFileStorage(), HAType.SharedStore, isNetty(), false);
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -46,14 +46,14 @@ public class SimpleSymmetricClusterTest extends ClusterTestBase {
|
|||
@Test
|
||||
public void testSimpleWithBackup() throws Exception {
|
||||
// The backups
|
||||
setupBackupServer(0, 3, isFileStorage(), true, isNetty());
|
||||
setupBackupServer(1, 4, isFileStorage(), true, isNetty());
|
||||
setupBackupServer(2, 5, isFileStorage(), true, isNetty());
|
||||
setupBackupServer(0, 3, isFileStorage(), HAType.SharedStore, isNetty());
|
||||
setupBackupServer(1, 4, isFileStorage(), HAType.SharedStore, isNetty());
|
||||
setupBackupServer(2, 5, isFileStorage(), HAType.SharedStore, isNetty());
|
||||
|
||||
// The lives
|
||||
setupLiveServer(3, isFileStorage(), true, isNetty(), false);
|
||||
setupLiveServer(4, isFileStorage(), true, isNetty(), false);
|
||||
setupLiveServer(5, isFileStorage(), true, isNetty(), false);
|
||||
setupLiveServer(3, isFileStorage(), HAType.SharedStore, isNetty(), false);
|
||||
setupLiveServer(4, isFileStorage(), HAType.SharedStore, isNetty(), false);
|
||||
setupLiveServer(5, isFileStorage(), HAType.SharedStore, isNetty(), false);
|
||||
|
||||
setupClusterConnection("cluster0", "queues", MessageLoadBalancingType.ON_DEMAND, 1, isNetty(), 3, 4, 5);
|
||||
|
||||
|
|
|
@ -453,18 +453,18 @@ public class SymmetricClusterWithBackupTest extends SymmetricClusterTest {
|
|||
@Override
|
||||
protected void setupServers() throws Exception {
|
||||
// The backups
|
||||
setupBackupServer(5, 0, isFileStorage(), true, isNetty());
|
||||
setupBackupServer(6, 1, isFileStorage(), true, isNetty());
|
||||
setupBackupServer(7, 2, isFileStorage(), true, isNetty());
|
||||
setupBackupServer(8, 3, isFileStorage(), true, isNetty());
|
||||
setupBackupServer(9, 4, isFileStorage(), true, isNetty());
|
||||
setupBackupServer(5, 0, isFileStorage(), HAType.SharedStore, isNetty());
|
||||
setupBackupServer(6, 1, isFileStorage(), HAType.SharedStore, isNetty());
|
||||
setupBackupServer(7, 2, isFileStorage(), HAType.SharedStore, isNetty());
|
||||
setupBackupServer(8, 3, isFileStorage(), HAType.SharedStore, isNetty());
|
||||
setupBackupServer(9, 4, isFileStorage(), HAType.SharedStore, isNetty());
|
||||
|
||||
// The lives
|
||||
setupLiveServer(0, isFileStorage(), true, isNetty(), false);
|
||||
setupLiveServer(1, isFileStorage(), true, isNetty(), false);
|
||||
setupLiveServer(2, isFileStorage(), true, isNetty(), false);
|
||||
setupLiveServer(3, isFileStorage(), true, isNetty(), false);
|
||||
setupLiveServer(4, isFileStorage(), true, isNetty(), false);
|
||||
setupLiveServer(0, isFileStorage(), HAType.SharedStore, isNetty(), false);
|
||||
setupLiveServer(1, isFileStorage(), HAType.SharedStore, isNetty(), false);
|
||||
setupLiveServer(2, isFileStorage(), HAType.SharedStore, isNetty(), false);
|
||||
setupLiveServer(3, isFileStorage(), HAType.SharedStore, isNetty(), false);
|
||||
setupLiveServer(4, isFileStorage(), HAType.SharedStore, isNetty(), false);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -60,6 +60,8 @@ import org.apache.activemq.artemis.core.server.cluster.ha.BackupPolicy;
|
|||
import org.apache.activemq.artemis.core.server.cluster.ha.HAPolicy;
|
||||
import org.apache.activemq.artemis.core.server.cluster.ha.ReplicaPolicy;
|
||||
import org.apache.activemq.artemis.core.server.cluster.ha.ReplicatedPolicy;
|
||||
import org.apache.activemq.artemis.core.server.cluster.ha.ReplicationBackupPolicy;
|
||||
import org.apache.activemq.artemis.core.server.cluster.ha.ReplicationPrimaryPolicy;
|
||||
import org.apache.activemq.artemis.core.server.cluster.ha.SharedStoreMasterPolicy;
|
||||
import org.apache.activemq.artemis.core.server.cluster.ha.SharedStoreSlavePolicy;
|
||||
import org.apache.activemq.artemis.core.server.files.FileMoveManager;
|
||||
|
@ -657,6 +659,8 @@ public class FailoverTest extends FailoverTestBase {
|
|||
|
||||
backupServer.getServer().fail(true);
|
||||
|
||||
decrementActivationSequenceForForceRestartOf(liveServer);
|
||||
|
||||
liveServer.start();
|
||||
|
||||
consumer.close();
|
||||
|
@ -786,7 +790,7 @@ public class FailoverTest extends FailoverTestBase {
|
|||
((ReplicaPolicy) haPolicy).setMaxSavedReplicatedJournalsSize(1);
|
||||
}
|
||||
|
||||
simpleFailover(haPolicy instanceof ReplicaPolicy, doFailBack);
|
||||
simpleFailover(haPolicy instanceof ReplicaPolicy || haPolicy instanceof ReplicationBackupPolicy, doFailBack);
|
||||
}
|
||||
|
||||
@Test(timeout = 120000)
|
||||
|
@ -816,9 +820,12 @@ public class FailoverTest extends FailoverTestBase {
|
|||
Thread.sleep(100);
|
||||
Assert.assertFalse("backup is not running", backupServer.isStarted());
|
||||
|
||||
Assert.assertFalse("must NOT be a backup", liveServer.getServer().getHAPolicy() instanceof BackupPolicy);
|
||||
final boolean isBackup = liveServer.getServer().getHAPolicy() instanceof BackupPolicy ||
|
||||
liveServer.getServer().getHAPolicy() instanceof ReplicationBackupPolicy;
|
||||
Assert.assertFalse("must NOT be a backup", isBackup);
|
||||
adaptLiveConfigForReplicatedFailBack(liveServer);
|
||||
beforeRestart(liveServer);
|
||||
decrementActivationSequenceForForceRestartOf(liveServer);
|
||||
liveServer.start();
|
||||
Assert.assertTrue("live initialized...", liveServer.getServer().waitForActivation(15, TimeUnit.SECONDS));
|
||||
|
||||
|
@ -827,7 +834,8 @@ public class FailoverTest extends FailoverTestBase {
|
|||
ClientSession session2 = createSession(sf, false, false);
|
||||
session2.start();
|
||||
ClientConsumer consumer2 = session2.createConsumer(FailoverTestBase.ADDRESS);
|
||||
boolean replication = liveServer.getServer().getHAPolicy() instanceof ReplicatedPolicy;
|
||||
final boolean replication = liveServer.getServer().getHAPolicy() instanceof ReplicatedPolicy ||
|
||||
liveServer.getServer().getHAPolicy() instanceof ReplicationPrimaryPolicy;
|
||||
if (replication)
|
||||
receiveMessages(consumer2, 0, NUM_MESSAGES, true);
|
||||
assertNoMoreMessages(consumer2);
|
||||
|
@ -838,7 +846,7 @@ public class FailoverTest extends FailoverTestBase {
|
|||
public void testSimpleFailover() throws Exception {
|
||||
HAPolicy haPolicy = backupServer.getServer().getHAPolicy();
|
||||
|
||||
simpleFailover(haPolicy instanceof ReplicaPolicy, false);
|
||||
simpleFailover(haPolicy instanceof ReplicaPolicy || haPolicy instanceof ReplicationBackupPolicy, false);
|
||||
}
|
||||
|
||||
@Test(timeout = 120000)
|
||||
|
@ -926,12 +934,13 @@ public class FailoverTest extends FailoverTestBase {
|
|||
while (!backupServer.isStarted() && i++ < 100) {
|
||||
Thread.sleep(100);
|
||||
}
|
||||
liveServer.getServer().waitForActivation(5, TimeUnit.SECONDS);
|
||||
backupServer.getServer().waitForActivation(5, TimeUnit.SECONDS);
|
||||
Assert.assertTrue(backupServer.isStarted());
|
||||
|
||||
if (isReplicated) {
|
||||
FileMoveManager moveManager = new FileMoveManager(backupServer.getServer().getConfiguration().getJournalLocation(), 0);
|
||||
Assert.assertEquals(1, moveManager.getNumberOfFolders());
|
||||
// backup has not had a chance to restart as a backup and cleanup
|
||||
Wait.assertTrue(() -> moveManager.getNumberOfFolders() <= 2);
|
||||
}
|
||||
} else {
|
||||
backupServer.stop();
|
||||
|
@ -2422,6 +2431,10 @@ public class FailoverTest extends FailoverTestBase {
|
|||
// no-op
|
||||
}
|
||||
|
||||
protected void decrementActivationSequenceForForceRestartOf(TestableServer liveServer) throws Exception {
|
||||
// no-op
|
||||
}
|
||||
|
||||
protected ClientSession sendAndConsume(final ClientSessionFactory sf1, final boolean createQueue) throws Exception {
|
||||
ClientSession session = createSession(sf1, false, true, true);
|
||||
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.activemq.artemis.tests.integration.cluster.failover;
|
|||
import java.io.IOException;
|
||||
import java.net.ServerSocket;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
@ -36,15 +37,18 @@ import org.apache.activemq.artemis.core.client.impl.ClientSessionFactoryInternal
|
|||
import org.apache.activemq.artemis.core.client.impl.ServerLocatorInternal;
|
||||
import org.apache.activemq.artemis.core.config.ClusterConnectionConfiguration;
|
||||
import org.apache.activemq.artemis.core.config.Configuration;
|
||||
import org.apache.activemq.artemis.core.config.ha.DistributedPrimitiveManagerConfiguration;
|
||||
import org.apache.activemq.artemis.core.config.ha.ReplicaPolicyConfiguration;
|
||||
import org.apache.activemq.artemis.core.config.ha.SharedStoreMasterPolicyConfiguration;
|
||||
import org.apache.activemq.artemis.core.config.ha.SharedStoreSlavePolicyConfiguration;
|
||||
import org.apache.activemq.artemis.core.remoting.impl.invm.InVMConnector;
|
||||
import org.apache.activemq.artemis.core.remoting.impl.invm.InVMRegistry;
|
||||
import org.apache.activemq.artemis.core.server.NodeManager;
|
||||
import org.apache.activemq.artemis.core.server.cluster.ha.HAPolicy;
|
||||
import org.apache.activemq.artemis.core.server.cluster.ha.ReplicatedPolicy;
|
||||
import org.apache.activemq.artemis.core.server.impl.ActiveMQServerImpl;
|
||||
import org.apache.activemq.artemis.core.server.impl.InVMNodeManager;
|
||||
import org.apache.activemq.artemis.quorum.file.FileBasedPrimitiveManager;
|
||||
import org.apache.activemq.artemis.tests.integration.cluster.util.SameProcessActiveMQServer;
|
||||
import org.apache.activemq.artemis.tests.integration.cluster.util.TestableServer;
|
||||
import org.apache.activemq.artemis.tests.util.ActiveMQTestBase;
|
||||
|
@ -79,6 +83,10 @@ public abstract class FailoverTestBase extends ActiveMQTestBase {
|
|||
|
||||
protected NodeManager nodeManager;
|
||||
|
||||
protected NodeManager backupNodeManager;
|
||||
|
||||
protected DistributedPrimitiveManagerConfiguration managerConfiguration;
|
||||
|
||||
protected boolean startBackupServer = true;
|
||||
|
||||
@Override
|
||||
|
@ -164,6 +172,10 @@ public abstract class FailoverTestBase extends ActiveMQTestBase {
|
|||
return new InVMNodeManager(false);
|
||||
}
|
||||
|
||||
protected NodeManager createNodeManager(Configuration configuration) throws Exception {
|
||||
return new InVMNodeManager(false, configuration.getNodeManagerLockLocation());
|
||||
}
|
||||
|
||||
protected void createConfigs() throws Exception {
|
||||
nodeManager = createNodeManager();
|
||||
TransportConfiguration liveConnector = getConnectorTransportConfiguration(true);
|
||||
|
@ -202,13 +214,14 @@ public abstract class FailoverTestBase extends ActiveMQTestBase {
|
|||
backupConfig.setBindingsDirectory(getBindingsDir(0, true)).setJournalDirectory(getJournalDir(0, true)).setPagingDirectory(getPageDir(0, true)).setLargeMessagesDirectory(getLargeMessagesDir(0, true)).setSecurityEnabled(false);
|
||||
|
||||
setupHAPolicyConfiguration();
|
||||
nodeManager = createReplicatedBackupNodeManager(backupConfig);
|
||||
backupNodeManager = createReplicatedBackupNodeManager(backupConfig);
|
||||
|
||||
backupServer = createTestableServer(backupConfig);
|
||||
backupServer = createTestableServer(backupConfig, backupNodeManager);
|
||||
|
||||
liveConfig.clearAcceptorConfigurations().addAcceptorConfiguration(getAcceptorTransportConfiguration(true));
|
||||
|
||||
liveServer = createTestableServer(liveConfig);
|
||||
nodeManager = createNodeManager(liveConfig);
|
||||
liveServer = createTestableServer(liveConfig, nodeManager);
|
||||
|
||||
if (supportsRetention()) {
|
||||
liveServer.getServer().getConfiguration().setJournalRetentionDirectory(getJournalDir(0, false) + "_retention");
|
||||
|
@ -216,7 +229,35 @@ public abstract class FailoverTestBase extends ActiveMQTestBase {
|
|||
}
|
||||
}
|
||||
|
||||
protected void createPluggableReplicatedConfigs() throws Exception {
|
||||
final TransportConfiguration liveConnector = getConnectorTransportConfiguration(true);
|
||||
final TransportConfiguration backupConnector = getConnectorTransportConfiguration(false);
|
||||
final TransportConfiguration backupAcceptor = getAcceptorTransportConfiguration(false);
|
||||
|
||||
backupConfig = createDefaultInVMConfig();
|
||||
liveConfig = createDefaultInVMConfig();
|
||||
|
||||
managerConfiguration =
|
||||
new DistributedPrimitiveManagerConfiguration(FileBasedPrimitiveManager.class.getName(),
|
||||
Collections.singletonMap("locks-folder", temporaryFolder.newFolder("manager").toString()));
|
||||
|
||||
ReplicatedBackupUtils.configurePluggableQuorumReplicationPair(backupConfig, backupConnector, backupAcceptor, liveConfig, liveConnector, null, managerConfiguration, managerConfiguration);
|
||||
|
||||
backupConfig.setBindingsDirectory(getBindingsDir(0, true)).setJournalDirectory(getJournalDir(0, true)).setPagingDirectory(getPageDir(0, true)).setLargeMessagesDirectory(getLargeMessagesDir(0, true)).setSecurityEnabled(false);
|
||||
|
||||
setupHAPolicyConfiguration();
|
||||
backupNodeManager = createReplicatedBackupNodeManager(backupConfig);
|
||||
|
||||
backupServer = createTestableServer(backupConfig, backupNodeManager);
|
||||
|
||||
liveConfig.clearAcceptorConfigurations().addAcceptorConfiguration(getAcceptorTransportConfiguration(true));
|
||||
|
||||
nodeManager = createNodeManager(liveConfig);
|
||||
liveServer = createTestableServer(liveConfig, nodeManager);
|
||||
}
|
||||
|
||||
protected void setupHAPolicyConfiguration() {
|
||||
Assert.assertTrue(backupConfig.getHAPolicyConfiguration() instanceof ReplicaPolicyConfiguration);
|
||||
((ReplicaPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setMaxSavedReplicatedJournalsSize(-1).setAllowFailBack(true);
|
||||
((ReplicaPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setRestartBackup(false);
|
||||
}
|
||||
|
@ -233,8 +274,11 @@ public abstract class FailoverTestBase extends ActiveMQTestBase {
|
|||
configuration.getConnectorConfigurations().put(backupConnector.getName(), backupConnector);
|
||||
return;
|
||||
}
|
||||
ReplicatedPolicy haPolicy = (ReplicatedPolicy) server.getServer().getHAPolicy();
|
||||
haPolicy.setCheckForLiveServer(true);
|
||||
HAPolicy policy = server.getServer().getHAPolicy();
|
||||
if (policy instanceof ReplicatedPolicy) {
|
||||
((ReplicatedPolicy) policy).setCheckForLiveServer(true);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -253,6 +297,7 @@ public abstract class FailoverTestBase extends ActiveMQTestBase {
|
|||
|
||||
nodeManager = null;
|
||||
|
||||
backupNodeManager = null;
|
||||
try {
|
||||
ServerSocket serverSocket = new ServerSocket(61616);
|
||||
serverSocket.close();
|
||||
|
|
|
@ -19,8 +19,8 @@ package org.apache.activemq.artemis.tests.integration.cluster.failover;
|
|||
public class GroupingFailoverReplicationTest extends GroupingFailoverTestBase {
|
||||
|
||||
@Override
|
||||
protected boolean isSharedStore() {
|
||||
return false;
|
||||
protected HAType haType() {
|
||||
return HAType.SharedNothingReplication;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -19,7 +19,7 @@ package org.apache.activemq.artemis.tests.integration.cluster.failover;
|
|||
public class GroupingFailoverSharedServerTest extends GroupingFailoverTestBase {
|
||||
|
||||
@Override
|
||||
protected boolean isSharedStore() {
|
||||
return true;
|
||||
protected HAType haType() {
|
||||
return HAType.SharedStore;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -26,22 +26,26 @@ import org.apache.activemq.artemis.api.core.client.ServerLocator;
|
|||
import org.apache.activemq.artemis.core.client.impl.TopologyMemberImpl;
|
||||
import org.apache.activemq.artemis.core.config.ha.ReplicaPolicyConfiguration;
|
||||
import org.apache.activemq.artemis.core.config.ha.ReplicatedPolicyConfiguration;
|
||||
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
|
||||
import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration;
|
||||
import org.apache.activemq.artemis.core.server.cluster.impl.MessageLoadBalancingType;
|
||||
import org.apache.activemq.artemis.core.server.group.impl.GroupingHandlerConfiguration;
|
||||
import org.apache.activemq.artemis.core.server.impl.ReplicationBackupActivation;
|
||||
import org.apache.activemq.artemis.core.server.impl.SharedNothingBackupActivation;
|
||||
import org.apache.activemq.artemis.tests.integration.cluster.distribution.ClusterTestBase;
|
||||
import org.apache.activemq.artemis.tests.util.ActiveMQTestBase;
|
||||
import org.apache.activemq.artemis.utils.Wait;
|
||||
import org.junit.Test;
|
||||
|
||||
public abstract class GroupingFailoverTestBase extends ClusterTestBase {
|
||||
|
||||
@Test
|
||||
public void testGroupingLocalHandlerFails() throws Exception {
|
||||
setupBackupServer(2, 0, isFileStorage(), isSharedStore(), isNetty());
|
||||
setupBackupServer(2, 0, isFileStorage(), haType(), isNetty());
|
||||
|
||||
setupLiveServer(0, isFileStorage(), isSharedStore(), isNetty(), false);
|
||||
setupLiveServer(0, isFileStorage(), haType(), isNetty(), false);
|
||||
|
||||
setupLiveServer(1, isFileStorage(), isSharedStore(), isNetty(), false);
|
||||
setupLiveServer(1, isFileStorage(), haType(), isNetty(), false);
|
||||
|
||||
setupClusterConnection("cluster0", "queues", MessageLoadBalancingType.ON_DEMAND, 1, isNetty(), 0, 1);
|
||||
|
||||
|
@ -54,10 +58,18 @@ public abstract class GroupingFailoverTestBase extends ClusterTestBase {
|
|||
setUpGroupHandler(GroupingHandlerConfiguration.TYPE.REMOTE, 1);
|
||||
|
||||
setUpGroupHandler(GroupingHandlerConfiguration.TYPE.LOCAL, 2);
|
||||
if (!isSharedStore()) {
|
||||
((ReplicatedPolicyConfiguration) servers[0].getConfiguration().getHAPolicyConfiguration()).setGroupName("group1");
|
||||
((ReplicatedPolicyConfiguration) servers[1].getConfiguration().getHAPolicyConfiguration()).setGroupName("group2");
|
||||
((ReplicaPolicyConfiguration) servers[2].getConfiguration().getHAPolicyConfiguration()).setGroupName("group1");
|
||||
switch (haType()) {
|
||||
|
||||
case SharedNothingReplication:
|
||||
((ReplicatedPolicyConfiguration) servers[0].getConfiguration().getHAPolicyConfiguration()).setGroupName("group1");
|
||||
((ReplicatedPolicyConfiguration) servers[1].getConfiguration().getHAPolicyConfiguration()).setGroupName("group2");
|
||||
((ReplicaPolicyConfiguration) servers[2].getConfiguration().getHAPolicyConfiguration()).setGroupName("group1");
|
||||
break;
|
||||
case PluggableQuorumReplication:
|
||||
((ReplicationPrimaryPolicyConfiguration) servers[0].getConfiguration().getHAPolicyConfiguration()).setGroupName("group1");
|
||||
((ReplicationPrimaryPolicyConfiguration) servers[1].getConfiguration().getHAPolicyConfiguration()).setGroupName("group2");
|
||||
((ReplicationBackupPolicyConfiguration) servers[2].getConfiguration().getHAPolicyConfiguration()).setGroupName("group1");
|
||||
break;
|
||||
}
|
||||
|
||||
startServers(0, 1, 2);
|
||||
|
@ -129,11 +141,11 @@ public abstract class GroupingFailoverTestBase extends ClusterTestBase {
|
|||
|
||||
@Test
|
||||
public void testGroupingLocalHandlerFailsMultipleGroups() throws Exception {
|
||||
setupBackupServer(2, 0, isFileStorage(), isSharedStore(), isNetty());
|
||||
setupBackupServer(2, 0, isFileStorage(), haType(), isNetty());
|
||||
|
||||
setupLiveServer(0, isFileStorage(), isSharedStore(), isNetty(), false);
|
||||
setupLiveServer(0, isFileStorage(), haType(), isNetty(), false);
|
||||
|
||||
setupLiveServer(1, isFileStorage(), isSharedStore(), isNetty(), false);
|
||||
setupLiveServer(1, isFileStorage(), haType(), isNetty(), false);
|
||||
|
||||
setupClusterConnection("cluster0", "queues", MessageLoadBalancingType.ON_DEMAND, 1, isNetty(), 0, 1);
|
||||
|
||||
|
@ -147,10 +159,18 @@ public abstract class GroupingFailoverTestBase extends ClusterTestBase {
|
|||
|
||||
setUpGroupHandler(GroupingHandlerConfiguration.TYPE.LOCAL, 2);
|
||||
|
||||
if (!isSharedStore()) {
|
||||
((ReplicatedPolicyConfiguration) servers[0].getConfiguration().getHAPolicyConfiguration()).setGroupName("group1");
|
||||
((ReplicatedPolicyConfiguration) servers[1].getConfiguration().getHAPolicyConfiguration()).setGroupName("group2");
|
||||
((ReplicaPolicyConfiguration) servers[2].getConfiguration().getHAPolicyConfiguration()).setGroupName("group1");
|
||||
switch (haType()) {
|
||||
|
||||
case SharedNothingReplication:
|
||||
((ReplicatedPolicyConfiguration) servers[0].getConfiguration().getHAPolicyConfiguration()).setGroupName("group1");
|
||||
((ReplicatedPolicyConfiguration) servers[1].getConfiguration().getHAPolicyConfiguration()).setGroupName("group2");
|
||||
((ReplicaPolicyConfiguration) servers[2].getConfiguration().getHAPolicyConfiguration()).setGroupName("group1");
|
||||
break;
|
||||
case PluggableQuorumReplication:
|
||||
((ReplicationPrimaryPolicyConfiguration) servers[0].getConfiguration().getHAPolicyConfiguration()).setGroupName("group1");
|
||||
((ReplicationPrimaryPolicyConfiguration) servers[1].getConfiguration().getHAPolicyConfiguration()).setGroupName("group2");
|
||||
((ReplicationBackupPolicyConfiguration) servers[2].getConfiguration().getHAPolicyConfiguration()).setGroupName("group1");
|
||||
break;
|
||||
}
|
||||
|
||||
startServers(0, 1, 2);
|
||||
|
@ -187,9 +207,17 @@ public abstract class GroupingFailoverTestBase extends ClusterTestBase {
|
|||
|
||||
verifyReceiveAllWithGroupIDRoundRobin(0, 30, 0, 1);
|
||||
|
||||
if (!isSharedStore()) {
|
||||
SharedNothingBackupActivation backupActivation = (SharedNothingBackupActivation) servers[2].getActivation();
|
||||
assertTrue(backupActivation.waitForBackupSync(10, TimeUnit.SECONDS));
|
||||
switch (haType()) {
|
||||
case SharedNothingReplication: {
|
||||
SharedNothingBackupActivation backupActivation = (SharedNothingBackupActivation) servers[2].getActivation();
|
||||
assertTrue(backupActivation.waitForBackupSync(10, TimeUnit.SECONDS));
|
||||
}
|
||||
break;
|
||||
case PluggableQuorumReplication: {
|
||||
ReplicationBackupActivation backupActivation = (ReplicationBackupActivation) servers[2].getActivation();
|
||||
Wait.assertTrue(backupActivation::isReplicaSync, TimeUnit.SECONDS.toMillis(10));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
closeSessionFactory(0);
|
||||
|
|
|
@ -49,14 +49,14 @@ public class LiveVoteOnBackupFailureClusterTest extends ClusterWithBackupFailove
|
|||
@Override
|
||||
protected void setupServers() throws Exception {
|
||||
// The backups
|
||||
setupBackupServer(3, 0, isFileStorage(), isSharedStorage(), isNetty());
|
||||
setupBackupServer(4, 1, isFileStorage(), isSharedStorage(), isNetty());
|
||||
setupBackupServer(5, 2, isFileStorage(), isSharedStorage(), isNetty());
|
||||
setupBackupServer(3, 0, isFileStorage(), haType(), isNetty());
|
||||
setupBackupServer(4, 1, isFileStorage(), haType(), isNetty());
|
||||
setupBackupServer(5, 2, isFileStorage(), haType(), isNetty());
|
||||
|
||||
// The lives
|
||||
setupLiveServer(0, isFileStorage(), isSharedStorage(), isNetty(), false);
|
||||
setupLiveServer(1, isFileStorage(), isSharedStorage(), isNetty(), false);
|
||||
setupLiveServer(2, isFileStorage(), isSharedStorage(), isNetty(), false);
|
||||
setupLiveServer(0, isFileStorage(), haType(), isNetty(), false);
|
||||
setupLiveServer(1, isFileStorage(), haType(), isNetty(), false);
|
||||
setupLiveServer(2, isFileStorage(), haType(), isNetty(), false);
|
||||
|
||||
//we need to know who is connected to who
|
||||
((ReplicatedPolicyConfiguration) servers[0].getConfiguration().getHAPolicyConfiguration()).setGroupName("group0");
|
||||
|
@ -71,9 +71,9 @@ public class LiveVoteOnBackupFailureClusterTest extends ClusterWithBackupFailove
|
|||
((ReplicatedPolicyConfiguration) servers[1].getConfiguration().getHAPolicyConfiguration()).setVoteOnReplicationFailure(true);
|
||||
((ReplicatedPolicyConfiguration) servers[2].getConfiguration().getHAPolicyConfiguration()).setVoteOnReplicationFailure(true);
|
||||
}
|
||||
|
||||
protected boolean isSharedStorage() {
|
||||
return false;
|
||||
@Override
|
||||
protected HAType haType() {
|
||||
return HAType.SharedNothingReplication;
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
@ -16,7 +16,9 @@
|
|||
*/
|
||||
package org.apache.activemq.artemis.tests.integration.cluster.failover;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.activemq.artemis.api.core.SimpleString;
|
||||
|
@ -27,14 +29,19 @@ import org.apache.activemq.artemis.api.core.client.ClientSessionFactory;
|
|||
import org.apache.activemq.artemis.core.client.impl.ServerLocatorInternal;
|
||||
import org.apache.activemq.artemis.core.config.Configuration;
|
||||
import org.apache.activemq.artemis.core.config.HAPolicyConfiguration;
|
||||
import org.apache.activemq.artemis.core.config.ha.DistributedPrimitiveManagerConfiguration;
|
||||
import org.apache.activemq.artemis.core.config.ha.ReplicaPolicyConfiguration;
|
||||
import org.apache.activemq.artemis.core.config.ha.ReplicatedPolicyConfiguration;
|
||||
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
|
||||
import org.apache.activemq.artemis.core.config.ha.ReplicationPrimaryPolicyConfiguration;
|
||||
import org.apache.activemq.artemis.core.config.ha.SharedStoreMasterPolicyConfiguration;
|
||||
import org.apache.activemq.artemis.core.config.ha.SharedStoreSlavePolicyConfiguration;
|
||||
import org.apache.activemq.artemis.core.server.ActiveMQServer;
|
||||
import org.apache.activemq.artemis.core.server.ActiveMQServerLogger;
|
||||
import org.apache.activemq.artemis.core.server.NodeManager;
|
||||
import org.apache.activemq.artemis.core.server.Queue;
|
||||
import org.apache.activemq.artemis.quorum.file.FileBasedPrimitiveManager;
|
||||
import org.apache.activemq.artemis.tests.integration.cluster.distribution.ClusterTestBase;
|
||||
import org.apache.activemq.artemis.tests.util.Wait;
|
||||
import org.apache.activemq.artemis.tests.integration.cluster.util.SameProcessActiveMQServer;
|
||||
import org.apache.activemq.artemis.tests.integration.cluster.util.TestableServer;
|
||||
|
@ -43,6 +50,21 @@ import org.apache.activemq.artemis.tests.util.TransportConfigurationUtils;
|
|||
import org.junit.Before;
|
||||
|
||||
public abstract class MultipleServerFailoverTestBase extends ActiveMQTestBase {
|
||||
|
||||
private DistributedPrimitiveManagerConfiguration pluggableQuorumConfiguration = null;
|
||||
|
||||
private DistributedPrimitiveManagerConfiguration getOrCreatePluggableQuorumConfiguration() {
|
||||
if (pluggableQuorumConfiguration != null) {
|
||||
return pluggableQuorumConfiguration;
|
||||
}
|
||||
try {
|
||||
pluggableQuorumConfiguration = new DistributedPrimitiveManagerConfiguration(FileBasedPrimitiveManager.class.getName(), Collections.singletonMap("locks-folder", temporaryFolder.newFolder("manager").toString()));
|
||||
} catch (IOException ioException) {
|
||||
return null;
|
||||
}
|
||||
return pluggableQuorumConfiguration;
|
||||
}
|
||||
|
||||
// Constants -----------------------------------------------------
|
||||
|
||||
// TODO: find a better solution for this
|
||||
|
@ -67,7 +89,15 @@ public abstract class MultipleServerFailoverTestBase extends ActiveMQTestBase {
|
|||
|
||||
public abstract boolean isNetty();
|
||||
|
||||
public abstract boolean isSharedStore();
|
||||
public enum HAType {
|
||||
SharedStore, SharedNothingReplication, PluggableQuorumReplication
|
||||
}
|
||||
|
||||
public abstract HAType haType();
|
||||
|
||||
protected final boolean isSharedStore() {
|
||||
return ClusterTestBase.HAType.SharedStore.equals(haType());
|
||||
}
|
||||
|
||||
public abstract String getNodeGroupName();
|
||||
|
||||
|
@ -82,14 +112,22 @@ public abstract class MultipleServerFailoverTestBase extends ActiveMQTestBase {
|
|||
|
||||
for (int i = 0; i < getLiveServerCount(); i++) {
|
||||
HAPolicyConfiguration haPolicyConfiguration = null;
|
||||
switch (haType()) {
|
||||
|
||||
if (isSharedStore()) {
|
||||
haPolicyConfiguration = new SharedStoreMasterPolicyConfiguration();
|
||||
} else {
|
||||
haPolicyConfiguration = new ReplicatedPolicyConfiguration();
|
||||
if (getNodeGroupName() != null) {
|
||||
((ReplicatedPolicyConfiguration) haPolicyConfiguration).setGroupName(getNodeGroupName() + "-" + i);
|
||||
}
|
||||
case SharedStore:
|
||||
haPolicyConfiguration = new SharedStoreMasterPolicyConfiguration();
|
||||
break;
|
||||
case SharedNothingReplication:
|
||||
haPolicyConfiguration = new ReplicatedPolicyConfiguration();
|
||||
if (getNodeGroupName() != null) {
|
||||
((ReplicatedPolicyConfiguration) haPolicyConfiguration).setGroupName(getNodeGroupName() + "-" + i);
|
||||
}
|
||||
break;
|
||||
case PluggableQuorumReplication:
|
||||
haPolicyConfiguration = ReplicationPrimaryPolicyConfiguration.withDefault()
|
||||
.setDistributedManagerConfiguration(getOrCreatePluggableQuorumConfiguration())
|
||||
.setGroupName(getNodeGroupName() != null ? (getNodeGroupName() + "-" + i) : null);
|
||||
break;
|
||||
}
|
||||
|
||||
Configuration configuration = createDefaultConfig(isNetty()).clearAcceptorConfigurations().addAcceptorConfiguration(getAcceptorTransportConfiguration(true, i)).setHAPolicyConfiguration(haPolicyConfiguration);
|
||||
|
@ -126,13 +164,22 @@ public abstract class MultipleServerFailoverTestBase extends ActiveMQTestBase {
|
|||
for (int i = 0; i < getBackupServerCount(); i++) {
|
||||
HAPolicyConfiguration haPolicyConfiguration = null;
|
||||
|
||||
if (isSharedStore()) {
|
||||
haPolicyConfiguration = new SharedStoreSlavePolicyConfiguration();
|
||||
} else {
|
||||
haPolicyConfiguration = new ReplicaPolicyConfiguration();
|
||||
if (getNodeGroupName() != null) {
|
||||
((ReplicaPolicyConfiguration) haPolicyConfiguration).setGroupName(getNodeGroupName() + "-" + i);
|
||||
}
|
||||
switch (haType()) {
|
||||
|
||||
case SharedStore:
|
||||
haPolicyConfiguration = new SharedStoreSlavePolicyConfiguration();
|
||||
break;
|
||||
case SharedNothingReplication:
|
||||
haPolicyConfiguration = new ReplicaPolicyConfiguration();
|
||||
if (getNodeGroupName() != null) {
|
||||
((ReplicaPolicyConfiguration) haPolicyConfiguration).setGroupName(getNodeGroupName() + "-" + i);
|
||||
}
|
||||
break;
|
||||
case PluggableQuorumReplication:
|
||||
haPolicyConfiguration = ReplicationBackupPolicyConfiguration.withDefault()
|
||||
.setDistributedManagerConfiguration(getOrCreatePluggableQuorumConfiguration())
|
||||
.setGroupName(getNodeGroupName() != null ? (getNodeGroupName() + "-" + i) : null);
|
||||
break;
|
||||
}
|
||||
|
||||
Configuration configuration = createDefaultConfig(isNetty()).clearAcceptorConfigurations().addAcceptorConfiguration(getAcceptorTransportConfiguration(false, i)).setHAPolicyConfiguration(haPolicyConfiguration);
|
||||
|
@ -224,12 +271,14 @@ public abstract class MultipleServerFailoverTestBase extends ActiveMQTestBase {
|
|||
return addClientSession(sf.createSession(xa, autoCommitSends, autoCommitAcks));
|
||||
}
|
||||
|
||||
protected void waitForDistribution(SimpleString address, ActiveMQServer server, int messageCount) throws Exception {
|
||||
protected boolean waitForDistribution(SimpleString address, ActiveMQServer server, int messageCount) throws Exception {
|
||||
ActiveMQServerLogger.LOGGER.debug("waiting for distribution of messages on server " + server);
|
||||
|
||||
Queue q = (Queue) server.getPostOffice().getBinding(address).getBindable();
|
||||
|
||||
Wait.waitFor(() -> getMessageCount(q) >= messageCount);
|
||||
return Wait.waitFor(() -> {
|
||||
return getMessageCount(q) >= messageCount;
|
||||
});
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -103,7 +103,7 @@ public class NettyReplicationStopTest extends FailoverTestBase {
|
|||
|
||||
final int numMessages = 10;
|
||||
|
||||
ReplicationEndpoint endpoint = backupServer.getServer().getReplicationEndpoint();
|
||||
ReplicationEndpoint endpoint = getReplicationEndpoint(backupServer.getServer());
|
||||
|
||||
endpoint.pause();
|
||||
|
||||
|
|
|
@ -124,14 +124,14 @@ public class NetworkIsolationTest extends FailoverTestBase {
|
|||
|
||||
liveServer.start();
|
||||
|
||||
for (int i = 0; i < 1000 && backupServer.getServer().getReplicationEndpoint() != null && !backupServer.getServer().getReplicationEndpoint().isStarted(); i++) {
|
||||
for (int i = 0; i < 1000 && getReplicationEndpoint(backupServer.getServer()) != null && !getReplicationEndpoint(backupServer.getServer()).isStarted(); i++) {
|
||||
Thread.sleep(10);
|
||||
}
|
||||
|
||||
backupServer.getServer().getNetworkHealthCheck().clearAddresses();
|
||||
|
||||
// This will make sure the backup got synchronized after the network was activated again
|
||||
Wait.assertTrue(() -> backupServer.getServer().getReplicationEndpoint().isStarted());
|
||||
Assert.assertTrue(getReplicationEndpoint(backupServer.getServer()).isStarted());
|
||||
} finally {
|
||||
AssertionLoggerHandler.stopCapture();
|
||||
}
|
||||
|
|
|
@ -17,12 +17,10 @@
|
|||
|
||||
package org.apache.activemq.artemis.tests.integration.cluster.failover;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
import org.apache.activemq.artemis.api.core.ActiveMQException;
|
||||
import org.apache.activemq.artemis.api.core.Interceptor;
|
||||
import org.apache.activemq.artemis.api.core.QueueConfiguration;
|
||||
import org.apache.activemq.artemis.api.core.SimpleString;
|
||||
import org.apache.activemq.artemis.api.core.TransportConfiguration;
|
||||
|
@ -34,16 +32,18 @@ import org.apache.activemq.artemis.core.client.impl.ClientSessionFactoryInternal
|
|||
import org.apache.activemq.artemis.core.config.Configuration;
|
||||
import org.apache.activemq.artemis.core.config.ha.ReplicaPolicyConfiguration;
|
||||
import org.apache.activemq.artemis.core.config.ha.ReplicatedPolicyConfiguration;
|
||||
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
|
||||
import org.apache.activemq.artemis.core.config.ha.SharedStoreSlavePolicyConfiguration;
|
||||
import org.apache.activemq.artemis.core.protocol.core.Packet;
|
||||
import org.apache.activemq.artemis.core.protocol.core.impl.PacketImpl;
|
||||
import org.apache.activemq.artemis.core.replication.ReplicationEndpoint;
|
||||
import org.apache.activemq.artemis.core.server.NodeManager;
|
||||
import org.apache.activemq.artemis.core.server.impl.Activation;
|
||||
import org.apache.activemq.artemis.core.server.impl.ActiveMQServerImpl;
|
||||
import org.apache.activemq.artemis.core.server.impl.InVMNodeManager;
|
||||
import org.apache.activemq.artemis.core.server.impl.ReplicationBackupActivation;
|
||||
import org.apache.activemq.artemis.core.server.impl.SharedNothingBackupActivation;
|
||||
import org.apache.activemq.artemis.tests.util.Wait;
|
||||
import org.apache.activemq.artemis.logs.AssertionLoggerHandler;
|
||||
import org.apache.activemq.artemis.spi.core.protocol.RemotingConnection;
|
||||
import org.apache.activemq.artemis.tests.integration.cluster.util.SameProcessActiveMQServer;
|
||||
import org.apache.activemq.artemis.tests.integration.cluster.util.TestableServer;
|
||||
import org.apache.activemq.artemis.tests.util.ActiveMQTestBase;
|
||||
|
@ -77,7 +77,9 @@ public class ReplicaTimeoutTest extends ActiveMQTestBase {
|
|||
}
|
||||
|
||||
protected TestableServer createTestableServer(Configuration config, NodeManager nodeManager) throws Exception {
|
||||
boolean isBackup = config.getHAPolicyConfiguration() instanceof ReplicaPolicyConfiguration || config.getHAPolicyConfiguration() instanceof SharedStoreSlavePolicyConfiguration;
|
||||
boolean isBackup = config.getHAPolicyConfiguration() instanceof ReplicationBackupPolicyConfiguration ||
|
||||
config.getHAPolicyConfiguration() instanceof ReplicaPolicyConfiguration ||
|
||||
config.getHAPolicyConfiguration() instanceof SharedStoreSlavePolicyConfiguration;
|
||||
return new SameProcessActiveMQServer(createInVMFailoverServer(true, config, nodeManager, isBackup ? 2 : 1));
|
||||
}
|
||||
|
||||
|
@ -119,6 +121,19 @@ public class ReplicaTimeoutTest extends ActiveMQTestBase {
|
|||
liveServer.crash(true, true, sessions);
|
||||
}
|
||||
|
||||
protected void configureReplicationPair(Configuration backupConfig,
|
||||
Configuration liveConfig,
|
||||
TransportConfiguration backupConnector,
|
||||
TransportConfiguration backupAcceptor,
|
||||
TransportConfiguration liveConnector) throws IOException {
|
||||
ReplicatedBackupUtils.configureReplicationPair(backupConfig, backupConnector, backupAcceptor, liveConfig, liveConnector, null);
|
||||
((ReplicatedPolicyConfiguration) liveConfig.getHAPolicyConfiguration()).setInitialReplicationSyncTimeout(1000);
|
||||
((ReplicaPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setInitialReplicationSyncTimeout(1000);
|
||||
((ReplicatedPolicyConfiguration) liveConfig.getHAPolicyConfiguration()).setCheckForLiveServer(true);
|
||||
((ReplicaPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setMaxSavedReplicatedJournalsSize(2).setAllowFailBack(true);
|
||||
((ReplicaPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setRestartBackup(false);
|
||||
}
|
||||
|
||||
@Test//(timeout = 120000)
|
||||
public void testFailbackTimeout() throws Exception {
|
||||
AssertionLoggerHandler.startCapture();
|
||||
|
@ -134,29 +149,22 @@ public class ReplicaTimeoutTest extends ActiveMQTestBase {
|
|||
Configuration backupConfig = createDefaultInVMConfig();
|
||||
Configuration liveConfig = createDefaultInVMConfig();
|
||||
|
||||
ReplicatedBackupUtils.configureReplicationPair(backupConfig, backupConnector, backupAcceptor, liveConfig, liveConnector, null);
|
||||
((ReplicatedPolicyConfiguration) liveConfig.getHAPolicyConfiguration()).setInitialReplicationSyncTimeout(1000);
|
||||
((ReplicaPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setInitialReplicationSyncTimeout(1000);
|
||||
configureReplicationPair(backupConfig, liveConfig, backupConnector, backupAcceptor, liveConnector);
|
||||
|
||||
backupConfig.setBindingsDirectory(getBindingsDir(0, true)).setJournalDirectory(getJournalDir(0, true)).
|
||||
setPagingDirectory(getPageDir(0, true)).setLargeMessagesDirectory(getLargeMessagesDir(0, true)).setSecurityEnabled(false);
|
||||
liveConfig.setBindingsDirectory(getBindingsDir(0, false)).setJournalDirectory(getJournalDir(0, false)).
|
||||
setPagingDirectory(getPageDir(0, false)).setLargeMessagesDirectory(getLargeMessagesDir(0, false)).setSecurityEnabled(false);
|
||||
|
||||
((ReplicatedPolicyConfiguration) liveConfig.getHAPolicyConfiguration()).setCheckForLiveServer(true);
|
||||
((ReplicaPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setMaxSavedReplicatedJournalsSize(2).setAllowFailBack(true);
|
||||
((ReplicaPolicyConfiguration) backupConfig.getHAPolicyConfiguration()).setRestartBackup(false);
|
||||
NodeManager replicatedBackupNodeManager = createReplicatedBackupNodeManager(backupConfig);
|
||||
|
||||
NodeManager nodeManager = createReplicatedBackupNodeManager(backupConfig);
|
||||
|
||||
backupServer = createTestableServer(backupConfig, nodeManager);
|
||||
backupServer = createTestableServer(backupConfig, replicatedBackupNodeManager);
|
||||
|
||||
liveConfig.clearAcceptorConfigurations().addAcceptorConfiguration(getAcceptorTransportConfiguration(true));
|
||||
|
||||
NodeManager nodeManager = createReplicatedBackupNodeManager(liveConfig);
|
||||
liveServer = createTestableServer(liveConfig, nodeManager);
|
||||
|
||||
AtomicBoolean ignoreIntercept = new AtomicBoolean(false);
|
||||
|
||||
final TestableServer theBackup = backupServer;
|
||||
|
||||
liveServer.start();
|
||||
|
@ -174,23 +182,30 @@ public class ReplicaTimeoutTest extends ActiveMQTestBase {
|
|||
|
||||
Wait.assertTrue(backupServer.getServer()::isActive);
|
||||
|
||||
ignoreIntercept.set(true);
|
||||
|
||||
((ActiveMQServerImpl) backupServer.getServer()).setAfterActivationCreated(new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
//theBackup.getServer().getActivation()
|
||||
|
||||
SharedNothingBackupActivation activation = (SharedNothingBackupActivation) theBackup.getServer().getActivation();
|
||||
activation.getReplicationEndpoint().addOutgoingInterceptorForReplication(new Interceptor() {
|
||||
@Override
|
||||
public boolean intercept(Packet packet, RemotingConnection connection) throws ActiveMQException {
|
||||
if (ignoreIntercept.get() && packet.getType() == PacketImpl.REPLICATION_RESPONSE_V2) {
|
||||
final Activation backupActivation = theBackup.getServer().getActivation();
|
||||
if (backupActivation instanceof SharedNothingBackupActivation) {
|
||||
SharedNothingBackupActivation activation = (SharedNothingBackupActivation) backupActivation;
|
||||
ReplicationEndpoint repEnd = activation.getReplicationEndpoint();
|
||||
repEnd.addOutgoingInterceptorForReplication((packet, connection) -> {
|
||||
if (packet.getType() == PacketImpl.REPLICATION_RESPONSE_V2) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
});
|
||||
});
|
||||
} else if (backupActivation instanceof ReplicationBackupActivation) {
|
||||
ReplicationBackupActivation activation = (ReplicationBackupActivation) backupActivation;
|
||||
activation.spyReplicationEndpointCreation(replicationEndpoint -> {
|
||||
replicationEndpoint.addOutgoingInterceptorForReplication((packet, connection) -> {
|
||||
if (packet.getType() == PacketImpl.REPLICATION_RESPONSE_V2) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
|
@ -198,7 +213,9 @@ public class ReplicaTimeoutTest extends ActiveMQTestBase {
|
|||
|
||||
Assert.assertTrue(Wait.waitFor(() -> AssertionLoggerHandler.findText("AMQ229114")));
|
||||
|
||||
Wait.assertFalse(liveServer.getServer()::isStarted);
|
||||
if (expectLiveSuicide()) {
|
||||
Wait.assertFalse(liveServer.getServer()::isStarted);
|
||||
}
|
||||
|
||||
} finally {
|
||||
if (sf != null) {
|
||||
|
@ -218,4 +235,8 @@ public class ReplicaTimeoutTest extends ActiveMQTestBase {
|
|||
}
|
||||
}
|
||||
|
||||
protected boolean expectLiveSuicide() {
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -178,9 +178,9 @@ public class ReplicatedDistributionTest extends ClusterTestBase {
|
|||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
|
||||
setupLiveServer(1, true, isSharedStore(), true, false);
|
||||
setupLiveServer(3, true, isSharedStore(), true, false);
|
||||
setupBackupServer(2, 3, true, isSharedStore(), true);
|
||||
setupLiveServer(1, true, haType(), true, false);
|
||||
setupLiveServer(3, true, haType(), true, false);
|
||||
setupBackupServer(2, 3, true, haType(), true);
|
||||
|
||||
final String address = ReplicatedDistributionTest.ADDRESS.toString();
|
||||
// notice the abuse of the method call, '3' is not a backup for '1'
|
||||
|
@ -210,7 +210,7 @@ public class ReplicatedDistributionTest extends ClusterTestBase {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected boolean isSharedStore() {
|
||||
return false;
|
||||
protected HAType haType() {
|
||||
return HAType.SharedNothingReplication;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -29,6 +29,7 @@ import org.apache.activemq.artemis.api.core.client.ClientSessionFactory;
|
|||
import org.apache.activemq.artemis.api.core.client.FailoverEventType;
|
||||
import org.apache.activemq.artemis.api.core.client.ServerLocator;
|
||||
import org.apache.activemq.artemis.core.config.ha.ReplicaPolicyConfiguration;
|
||||
import org.apache.activemq.artemis.core.config.ha.ReplicationBackupPolicyConfiguration;
|
||||
import org.apache.activemq.artemis.core.server.ActiveMQServer;
|
||||
import org.apache.activemq.artemis.tests.util.Wait;
|
||||
import org.apache.activemq.artemis.tests.integration.cluster.util.TestableServer;
|
||||
|
@ -51,8 +52,16 @@ public class ReplicatedMultipleServerFailoverExtraBackupsTest extends Replicated
|
|||
@Override
|
||||
@Test
|
||||
public void testStartLiveFirst() throws Exception {
|
||||
((ReplicaPolicyConfiguration) backupServers.get(2).getServer().getConfiguration().getHAPolicyConfiguration()).setGroupName(getNodeGroupName() + "-0");
|
||||
((ReplicaPolicyConfiguration) backupServers.get(3).getServer().getConfiguration().getHAPolicyConfiguration()).setGroupName(getNodeGroupName() + "-1");
|
||||
switch (haType()) {
|
||||
case SharedNothingReplication:
|
||||
((ReplicaPolicyConfiguration) backupServers.get(2).getServer().getConfiguration().getHAPolicyConfiguration()).setGroupName(getNodeGroupName() + "-0");
|
||||
((ReplicaPolicyConfiguration) backupServers.get(3).getServer().getConfiguration().getHAPolicyConfiguration()).setGroupName(getNodeGroupName() + "-1");
|
||||
break;
|
||||
case PluggableQuorumReplication:
|
||||
((ReplicationBackupPolicyConfiguration) backupServers.get(2).getServer().getConfiguration().getHAPolicyConfiguration()).setGroupName(getNodeGroupName() + "-0");
|
||||
((ReplicationBackupPolicyConfiguration) backupServers.get(3).getServer().getConfiguration().getHAPolicyConfiguration()).setGroupName(getNodeGroupName() + "-1");
|
||||
break;
|
||||
}
|
||||
|
||||
startServers(liveServers);
|
||||
backupServers.get(0).start();
|
||||
|
@ -66,7 +75,10 @@ public class ReplicatedMultipleServerFailoverExtraBackupsTest extends Replicated
|
|||
|
||||
sendCrashReceive();
|
||||
Wait.assertTrue(backupServers.get(0)::isActive, 5000, 10);
|
||||
Wait.assertTrue(backupServers.get(1)::isActive, 5000, 10);
|
||||
waitForTopology(backupServers.get(0).getServer(), liveServers.size(), 2);
|
||||
waitForTopology(backupServers.get(1).getServer(), liveServers.size(), 2);
|
||||
|
||||
sendCrashBackupReceive();
|
||||
}
|
||||
|
||||
|
@ -85,8 +97,17 @@ public class ReplicatedMultipleServerFailoverExtraBackupsTest extends Replicated
|
|||
@Override
|
||||
@Test
|
||||
public void testStartBackupFirst() throws Exception {
|
||||
((ReplicaPolicyConfiguration) backupServers.get(2).getServer().getConfiguration().getHAPolicyConfiguration()).setGroupName(getNodeGroupName() + "-0");
|
||||
((ReplicaPolicyConfiguration) backupServers.get(3).getServer().getConfiguration().getHAPolicyConfiguration()).setGroupName(getNodeGroupName() + "-1");
|
||||
switch (haType()) {
|
||||
case SharedNothingReplication:
|
||||
((ReplicaPolicyConfiguration) backupServers.get(2).getServer().getConfiguration().getHAPolicyConfiguration()).setGroupName(getNodeGroupName() + "-0");
|
||||
((ReplicaPolicyConfiguration) backupServers.get(3).getServer().getConfiguration().getHAPolicyConfiguration()).setGroupName(getNodeGroupName() + "-1");
|
||||
break;
|
||||
case PluggableQuorumReplication:
|
||||
((ReplicationBackupPolicyConfiguration) backupServers.get(2).getServer().getConfiguration().getHAPolicyConfiguration()).setGroupName(getNodeGroupName() + "-0");
|
||||
((ReplicationBackupPolicyConfiguration) backupServers.get(3).getServer().getConfiguration().getHAPolicyConfiguration()).setGroupName(getNodeGroupName() + "-1");
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
startServers(backupServers);
|
||||
startServers(liveServers);
|
||||
|
@ -97,6 +118,14 @@ public class ReplicatedMultipleServerFailoverExtraBackupsTest extends Replicated
|
|||
}
|
||||
|
||||
protected void sendCrashBackupReceive() throws Exception {
|
||||
|
||||
//make sure bindings are ready before sending messages b/c we verify strict load balancing in waitForDistribution
|
||||
this.waitForBindings( backupServers.get(0).getServer(), ADDRESS.toString(), false, 1, 0, 2000);
|
||||
this.waitForBindings( backupServers.get(0).getServer(), ADDRESS.toString(), false, 1, 0, 2000);
|
||||
|
||||
this.waitForBindings( backupServers.get(1).getServer(), ADDRESS.toString(), false, 1, 0, 2000);
|
||||
this.waitForBindings( backupServers.get(1).getServer(), ADDRESS.toString(), false, 1, 0, 2000);
|
||||
|
||||
ServerLocator locator0 = getBackupServerLocator(0);
|
||||
ServerLocator locator1 = getBackupServerLocator(1);
|
||||
|
||||
|
@ -120,8 +149,8 @@ public class ReplicatedMultipleServerFailoverExtraBackupsTest extends Replicated
|
|||
|
||||
producer.close();
|
||||
|
||||
waitForDistribution(ADDRESS, backupServers.get(0).getServer(), 100);
|
||||
waitForDistribution(ADDRESS, backupServers.get(1).getServer(), 100);
|
||||
assertTrue(waitForDistribution(ADDRESS, backupServers.get(0).getServer(), 100));
|
||||
assertTrue(waitForDistribution(ADDRESS, backupServers.get(1).getServer(), 100));
|
||||
|
||||
List<TestableServer> toCrash = new ArrayList<>();
|
||||
for (TestableServer backupServer : backupServers) {
|
||||
|
|
|
@ -16,6 +16,9 @@
|
|||
*/
|
||||
package org.apache.activemq.artemis.tests.integration.cluster.failover;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
|
||||
import org.apache.activemq.artemis.api.core.QueueConfiguration;
|
||||
import org.apache.activemq.artemis.api.core.client.ClientConsumer;
|
||||
import org.apache.activemq.artemis.api.core.client.ClientMessage;
|
||||
|
@ -25,9 +28,20 @@ import org.apache.activemq.artemis.api.core.client.ClientSessionFactory;
|
|||
import org.apache.activemq.artemis.api.core.client.ServerLocator;
|
||||
import org.apache.activemq.artemis.tests.integration.cluster.util.TestableServer;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.Parameterized;
|
||||
|
||||
@RunWith(Parameterized.class)
|
||||
public class ReplicatedMultipleServerFailoverTest extends MultipleServerFailoverTestBase {
|
||||
|
||||
@Parameterized.Parameter
|
||||
public HAType haType;
|
||||
|
||||
@Parameterized.Parameters(name = "ha={0}")
|
||||
public static Collection<Object[]> getParams() {
|
||||
return Arrays.asList(new Object[][]{{HAType.SharedNothingReplication}, {HAType.PluggableQuorumReplication}});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testStartLiveFirst() throws Exception {
|
||||
for (TestableServer liveServer : liveServers) {
|
||||
|
@ -140,8 +154,8 @@ public class ReplicatedMultipleServerFailoverTest extends MultipleServerFailover
|
|||
}
|
||||
|
||||
@Override
|
||||
public boolean isSharedStore() {
|
||||
return false;
|
||||
public HAType haType() {
|
||||
return haType;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -19,7 +19,7 @@ package org.apache.activemq.artemis.tests.integration.cluster.failover;
|
|||
public class SharedStoreDistributionTest extends ReplicatedDistributionTest {
|
||||
|
||||
@Override
|
||||
protected boolean isSharedStore() {
|
||||
return true;
|
||||
protected HAType haType() {
|
||||
return HAType.SharedStore;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -41,8 +41,8 @@ public class SharedStoreDontWaitForActivationTest extends ClusterTestBase {
|
|||
|
||||
// 1. configure 0 as backup of one to share the same node manager and file
|
||||
// storage locations
|
||||
setupBackupServer(0, 1, isFileStorage(), true, isNetty());
|
||||
setupLiveServer(1, isFileStorage(), true, isNetty(), false);
|
||||
setupBackupServer(0, 1, isFileStorage(), HAType.SharedStore, isNetty());
|
||||
setupLiveServer(1, isFileStorage(), HAType.SharedStore, isNetty(), false);
|
||||
|
||||
// now reconfigure the HA policy for both servers to master with automatic
|
||||
// failover and wait-for-activation disabled.
|
||||
|
|
|
@ -40,8 +40,8 @@ public class SharedStoreMetricsLeakTest extends ClusterTestBase {
|
|||
}
|
||||
|
||||
private void setupServers() throws Exception {
|
||||
setupLiveServer(0, isFileStorage(), true, isNetty(), false);
|
||||
setupBackupServer(1, 0, isFileStorage(), true, isNetty());
|
||||
setupLiveServer(0, isFileStorage(), HAType.SharedStore, isNetty(), false);
|
||||
setupBackupServer(1, 0, isFileStorage(), HAType.SharedStore, isNetty());
|
||||
|
||||
getServer(0).getConfiguration().setHAPolicyConfiguration(new SharedStoreMasterPolicyConfiguration().setFailoverOnServerShutdown(true));
|
||||
getServer(0).getConfiguration().setMetricsConfiguration(new MetricsConfiguration().setJvmThread(false).setJvmGc(false).setJvmMemory(false).setPlugin(new SimpleMetricsPlugin().init(null)));
|
||||
|
|
|
@ -41,9 +41,9 @@ public class SharedStoreScaleDownBackupTest extends ClusterTestBase {
|
|||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
|
||||
setupLiveServer(0, isFileStorage(), true, isNetty(), false);
|
||||
setupLiveServer(1, isFileStorage(), true, isNetty(), false);
|
||||
setupBackupServer(2, 0, isFileStorage(), true, isNetty());
|
||||
setupLiveServer(0, isFileStorage(), HAType.SharedStore, isNetty(), false);
|
||||
setupLiveServer(1, isFileStorage(), HAType.SharedStore, isNetty(), false);
|
||||
setupBackupServer(2, 0, isFileStorage(), HAType.SharedStore, isNetty());
|
||||
|
||||
setupClusterConnection("cluster0", "testAddress", MessageLoadBalancingType.ON_DEMAND, 1, isNetty(), 0, 1);
|
||||
setupClusterConnection("cluster1", "testAddress", MessageLoadBalancingType.ON_DEMAND, 1, isNetty(), 1, 0);
|
||||
|
|
|
@ -42,13 +42,13 @@ public class StaticClusterWithBackupFailoverTest extends ClusterWithBackupFailov
|
|||
@Override
|
||||
protected void setupServers() throws Exception {
|
||||
// The backups
|
||||
setupBackupServer(3, 0, isFileStorage(), isSharedStorage(), isNetty());
|
||||
setupBackupServer(4, 1, isFileStorage(), isSharedStorage(), isNetty());
|
||||
setupBackupServer(5, 2, isFileStorage(), isSharedStorage(), isNetty());
|
||||
setupBackupServer(3, 0, isFileStorage(), haType(), isNetty());
|
||||
setupBackupServer(4, 1, isFileStorage(), haType(), isNetty());
|
||||
setupBackupServer(5, 2, isFileStorage(), haType(), isNetty());
|
||||
|
||||
// The lives
|
||||
setupLiveServer(0, isFileStorage(), isSharedStorage(), isNetty(), false);
|
||||
setupLiveServer(1, isFileStorage(), isSharedStorage(), isNetty(), false);
|
||||
setupLiveServer(2, isFileStorage(), isSharedStorage(), isNetty(), false);
|
||||
setupLiveServer(0, isFileStorage(), haType(), isNetty(), false);
|
||||
setupLiveServer(1, isFileStorage(), haType(), isNetty(), false);
|
||||
setupLiveServer(2, isFileStorage(), haType(), isNetty(), false);
|
||||
}
|
||||
}
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue