mirror of https://github.com/apache/activemq.git
Fix for https://issues.apache.org/jira/browse/AMQ-5082 ActiveMQ replicatedLevelDB cluster breaks, all nodes stop listening.
Many thanks to Jim Robinson (jim.robinson@gmail.com) for the patch!
This commit is contained in:
parent
351d4b9dea
commit
a39e51e051
|
@ -72,7 +72,9 @@ object ZooKeeperGroup {
|
|||
*/
|
||||
class ZooKeeperGroup(val zk: ZKClient, val root: String) extends LifecycleListener with ChangeListenerSupport {
|
||||
|
||||
val tree = new ZooKeeperTreeTracker[Array[Byte]](zk, new ZKByteArrayDataReader, root, 1)
|
||||
var tree = new ZooKeeperTreeTracker[Array[Byte]](zk, new ZKByteArrayDataReader, root, 1)
|
||||
var rebuildTree = false
|
||||
|
||||
val joins = HashMap[String, Int]()
|
||||
|
||||
var members = new LinkedHashMap[String, Array[Byte]]
|
||||
|
@ -82,12 +84,13 @@ class ZooKeeperGroup(val zk: ZKClient, val root: String) extends LifecycleListen
|
|||
zk.registerListener(this)
|
||||
|
||||
create(root)
|
||||
tree.track(new NodeEventsListener[Array[Byte]]() {
|
||||
var treeEventHandler = new NodeEventsListener[Array[Byte]]() {
|
||||
def onEvents(events: Collection[NodeEvent[Array[Byte]]]): Unit = {
|
||||
if( !closed )
|
||||
fire_cluster_change
|
||||
fire_cluster_change;
|
||||
}
|
||||
})
|
||||
}
|
||||
tree.track(treeEventHandler)
|
||||
fire_cluster_change
|
||||
|
||||
@volatile
|
||||
|
@ -110,7 +113,21 @@ class ZooKeeperGroup(val zk: ZKClient, val root: String) extends LifecycleListen
|
|||
}
|
||||
|
||||
def connected = zk.isConnected
|
||||
def onConnected() = fireConnected()
|
||||
def onConnected() = {
|
||||
this.synchronized {
|
||||
// underlying ZooKeeperTreeTracker isn't rebuilding itself after
|
||||
// the loss of the session, so we need to destroy/rebuild it on
|
||||
// reconnect.
|
||||
if (rebuildTree) {
|
||||
tree.destroy
|
||||
tree = new ZooKeeperTreeTracker[Array[Byte]](zk, new ZKByteArrayDataReader, root, 1)
|
||||
tree.track(treeEventHandler)
|
||||
} else {
|
||||
rebuildTree = true
|
||||
}
|
||||
}
|
||||
fireConnected()
|
||||
}
|
||||
def onDisconnected() = {
|
||||
this.members = new LinkedHashMap()
|
||||
fireDisconnected()
|
||||
|
@ -187,5 +204,4 @@ class ZooKeeperGroup(val zk: ZKClient, val root: String) extends LifecycleListen
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -182,6 +182,67 @@ public class ElectingLevelDBStoreTest extends ZooKeeperTestSupport {
|
|||
});
|
||||
}
|
||||
|
||||
/*
|
||||
* testAMQ5082 tests the behavior of an ElectingLevelDBStore
|
||||
* pool when ZooKeeper I/O timeouts occur. See issue AMQ-5082.
|
||||
*/
|
||||
@Test(timeout = 1000 * 60 * 5)
|
||||
public void testAMQ5082() throws Throwable {
|
||||
final ArrayList<ElectingLevelDBStore> stores = new ArrayList<ElectingLevelDBStore>();
|
||||
|
||||
LOG.info("Launching 3 stores");
|
||||
for (String dir : new String[]{"leveldb-node1", "leveldb-node2", "leveldb-node3"}) {
|
||||
ElectingLevelDBStore store = createStoreNode();
|
||||
store.setDirectory(new File(data_dir(), dir));
|
||||
stores.add(store);
|
||||
asyncStart(store);
|
||||
}
|
||||
|
||||
LOG.info("Waiting 30s for stores to start");
|
||||
Thread.sleep(30 * 1000);
|
||||
|
||||
LOG.info("Checking for a single master");
|
||||
ElectingLevelDBStore master = null;
|
||||
for (ElectingLevelDBStore store: stores) {
|
||||
if (store.isMaster()) {
|
||||
assertNull(master);
|
||||
master = store;
|
||||
}
|
||||
}
|
||||
assertNotNull(master);
|
||||
|
||||
LOG.info("Imposing 1s I/O wait on Zookeeper connections, waiting 30s to confirm that quorum is not lost");
|
||||
this.connector.testHandle.setIOWaitMillis(1 * 1000, 30 * 1000);
|
||||
|
||||
LOG.info("Confirming that the quorum has not been lost");
|
||||
for (ElectingLevelDBStore store: stores) {
|
||||
if (store.isMaster()) {
|
||||
assertTrue(master == store);
|
||||
}
|
||||
}
|
||||
|
||||
LOG.info("Imposing 11s I/O wait on Zookeeper connections, waiting 30s for quorum to be lost");
|
||||
this.connector.testHandle.setIOWaitMillis(11 * 1000, 30 * 1000);
|
||||
|
||||
LOG.info("Confirming that the quorum has been lost");
|
||||
for (ElectingLevelDBStore store: stores) {
|
||||
assertFalse(store.isMaster());
|
||||
}
|
||||
master = null;
|
||||
|
||||
LOG.info("Lifting I/O wait on Zookeeper connections, waiting 30s for quorum to be re-established");
|
||||
this.connector.testHandle.setIOWaitMillis(0, 30 * 1000);
|
||||
|
||||
LOG.info("Checking for a single master");
|
||||
for (ElectingLevelDBStore store: stores) {
|
||||
if (store.isMaster()) {
|
||||
assertNull(master);
|
||||
master = store;
|
||||
}
|
||||
}
|
||||
assertNotNull(master);
|
||||
}
|
||||
|
||||
@After
|
||||
public void stop() throws Exception {
|
||||
if (master != null) {
|
||||
|
|
|
@ -19,7 +19,7 @@ package org.apache.activemq.leveldb.test;
|
|||
import org.apache.activemq.leveldb.CountDownFuture;
|
||||
import org.apache.activemq.leveldb.util.FileSupport;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.zookeeper.server.NIOServerCnxnFactory;
|
||||
import org.apache.zookeeper.server.TestServerCnxnFactory;
|
||||
import org.apache.zookeeper.server.ZooKeeperServer;
|
||||
import org.apache.zookeeper.server.persistence.FileTxnSnapLog;
|
||||
import org.junit.After;
|
||||
|
@ -34,7 +34,7 @@ import java.util.concurrent.TimeUnit;
|
|||
*/
|
||||
public class ZooKeeperTestSupport {
|
||||
|
||||
protected NIOServerCnxnFactory connector;
|
||||
protected TestServerCnxnFactory connector;
|
||||
|
||||
static File data_dir() {
|
||||
return new File("target/activemq-data/leveldb-elections");
|
||||
|
@ -49,7 +49,7 @@ public class ZooKeeperTestSupport {
|
|||
ZooKeeperServer zk_server = new ZooKeeperServer();
|
||||
zk_server.setTickTime(500);
|
||||
zk_server.setTxnLogFactory(new FileTxnSnapLog(new File(data_dir(), "zk-log"), new File(data_dir(), "zk-data")));
|
||||
connector = new NIOServerCnxnFactory();
|
||||
connector = new TestServerCnxnFactory();
|
||||
connector.configure(new InetSocketAddress(0), 100);
|
||||
connector.startup(zk_server);
|
||||
System.out.println("ZooKeeper started");
|
||||
|
|
|
@ -0,0 +1,101 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.zookeeper.server;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.channels.SelectionKey;
|
||||
import java.nio.channels.SocketChannel;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/*
|
||||
* TestServerCnxnFactory allows a caller to impose an artifical
|
||||
* wait on I/O over the ServerCnxn used to communicate with the
|
||||
* ZooKeeper server.
|
||||
*/
|
||||
public class TestServerCnxnFactory extends NIOServerCnxnFactory {
|
||||
protected static final Logger LOG = LoggerFactory.getLogger(TestServerCnxnFactory.class);
|
||||
|
||||
/* testHandle controls whehter or not an artifical wait
|
||||
* is imposed when talking to the ZooKeeper server
|
||||
*/
|
||||
public TestHandle testHandle = new TestHandle();
|
||||
|
||||
public TestServerCnxnFactory() throws IOException {
|
||||
super();
|
||||
}
|
||||
|
||||
protected NIOServerCnxn createConnection(SocketChannel sock, SelectionKey sk) throws IOException {
|
||||
return new TestServerCnxn(this.zkServer, sock, sk, this, testHandle);
|
||||
}
|
||||
|
||||
/*
|
||||
* TestHandle is handed to TestServerCnxn and is used to
|
||||
* control the amount of time the TestServerCnxn waits
|
||||
* before allowing an I/O operation.
|
||||
*/
|
||||
public class TestHandle {
|
||||
private Object mu = new Object();
|
||||
private int ioWaitMillis = 0;
|
||||
|
||||
/*
|
||||
* Set an artifical I/O wait (in milliseconds) on ServerCnxn and
|
||||
* then sleep for the specified number of milliseconds.
|
||||
*/
|
||||
public void setIOWaitMillis(int ioWaitMillis, int sleepMillis) {
|
||||
synchronized(mu) {
|
||||
this.ioWaitMillis = ioWaitMillis;
|
||||
}
|
||||
if (sleepMillis > 0) {
|
||||
try {
|
||||
Thread.sleep(sleepMillis);
|
||||
} catch (InterruptedException e) {}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Get the number of milliseconds to wait before
|
||||
* allowing ServerCnxn to perform I/O.
|
||||
*/
|
||||
public int getIOWaitMillis() {
|
||||
synchronized(mu) {
|
||||
return this.ioWaitMillis;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public class TestServerCnxn extends NIOServerCnxn {
|
||||
public TestHandle testHandle;
|
||||
|
||||
public TestServerCnxn(ZooKeeperServer zk, SocketChannel sock, SelectionKey sk, NIOServerCnxnFactory factory, TestHandle testHandle) throws IOException {
|
||||
super(zk, sock, sk, factory);
|
||||
this.testHandle = testHandle;
|
||||
}
|
||||
|
||||
public void doIO(SelectionKey k) throws InterruptedException {
|
||||
final int millis = this.testHandle.getIOWaitMillis();
|
||||
if (millis > 0) {
|
||||
LOG.info("imposing a "+millis+" millisecond wait on ServerCxn: "+this);
|
||||
try {
|
||||
Thread.sleep(millis);
|
||||
} catch (InterruptedException e) {}
|
||||
}
|
||||
super.doIO(k);
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue