https://issues.apache.org/jira/browse/AMQ-5703 - fix and test. We now skip past known corruption on a journal replay

This commit is contained in:
gtully 2015-04-01 14:50:12 +01:00
parent 6df02555fd
commit a7178a46b7
4 changed files with 314 additions and 3 deletions

View File

@ -601,7 +601,7 @@ public abstract class MessageDatabase extends ServiceSupport implements BrokerSe
if (recoveryPosition != null) {
int redoCounter = 0;
LOG.info("Recovering from the journal ...");
LOG.info("Recovering from the journal @" + recoveryPosition);
while (recoveryPosition != null) {
JournalCommand<?> message = load(recoveryPosition);
metadata.lastUpdate = recoveryPosition;

View File

@ -26,6 +26,7 @@ import java.util.concurrent.atomic.AtomicReference;
import java.util.zip.Adler32;
import java.util.zip.Checksum;
import org.apache.activemq.store.kahadb.disk.util.LinkedNode;
import org.apache.activemq.store.kahadb.disk.util.SequenceSet;
import org.apache.activemq.util.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -623,8 +624,12 @@ public class Journal {
accessorPool.closeDataFileAccessor(reader);
}
if (cur.getType() == 0) {
// invalid offset - jump to next datafile
Sequence corruptedRange = dataFile.corruptedBlocks.get(cur.getOffset());
if (corruptedRange != null) {
// skip corruption
cur.setSize((int) corruptedRange.range());
} else if (cur.getType() == 0) {
// eof - jump to next datafile
cur.setOffset(maxFileLength);
} else if (cur.getType() == USER_RECORD_TYPE) {
// Only return user records.

View File

@ -352,6 +352,20 @@ public class SequenceSet extends LinkedNodeList<Sequence> implements Iterable<Lo
return false;
}
public Sequence get(int value) {
if (!isEmpty()) {
Sequence sequence = getHead();
while (sequence != null) {
if (sequence.contains(value)) {
return sequence;
}
sequence = sequence.getNext();
}
}
return null;
}
/**
* Computes the size of this Sequence by summing the values of all
* the contained sequences.

View File

@ -0,0 +1,292 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.store.kahadb;
import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import javax.jms.Connection;
import javax.jms.Destination;
import javax.jms.Message;
import javax.jms.MessageConsumer;
import javax.jms.MessageProducer;
import javax.jms.Session;
import org.apache.activemq.ActiveMQConnectionFactory;
import org.apache.activemq.broker.BrokerService;
import org.apache.activemq.command.ActiveMQQueue;
import org.apache.activemq.store.kahadb.disk.journal.DataFile;
import org.apache.activemq.store.kahadb.disk.journal.Journal;
import org.apache.activemq.util.ByteSequence;
import org.apache.activemq.util.RecoverableRandomAccessFile;
import org.junit.After;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
@RunWith(Parameterized.class)
public class JournalCorruptionIndexRecoveryTest {
private static final Logger LOG = LoggerFactory.getLogger(JournalCorruptionIndexRecoveryTest.class);
ActiveMQConnectionFactory cf = null;
BrokerService broker = null;
private final Destination destination = new ActiveMQQueue("Test");
private String connectionUri;
private KahaDBPersistenceAdapter adapter;
@Parameterized.Parameter(0)
public byte fill = Byte.valueOf("3");
@Parameterized.Parameters(name = "fill=#{0}")
public static Iterable<Object[]> parameters() {
// corruption can be valid record type values
return Arrays.asList(new Object[][]{{Byte.valueOf("1")}, {Byte.valueOf("0")}, {Byte.valueOf("2")}, {Byte.valueOf("-1")} });
}
protected void startBroker() throws Exception {
doStartBroker(true);
}
protected void restartBroker() throws Exception {
File dataDir = broker.getPersistenceAdapter().getDirectory();
if (broker != null) {
broker.stop();
broker.waitUntilStopped();
}
whackIndex(dataDir);
doStartBroker(false);
}
private void doStartBroker(boolean delete) throws Exception {
broker = new BrokerService();
broker.setDeleteAllMessagesOnStartup(delete);
broker.setPersistent(true);
broker.setUseJmx(true);
broker.addConnector("tcp://localhost:0");
configurePersistence(broker);
connectionUri = "vm://localhost?create=false";
cf = new ActiveMQConnectionFactory(connectionUri);
broker.start();
LOG.info("Starting broker..");
}
protected void configurePersistence(BrokerService brokerService) throws Exception {
adapter = (KahaDBPersistenceAdapter) brokerService.getPersistenceAdapter();
// ensure there are a bunch of data files but multiple entries in each
adapter.setJournalMaxFileLength(1024 * 20);
// speed up the test case, checkpoint an cleanup early and often
adapter.setCheckpointInterval(5000);
adapter.setCleanupInterval(5000);
adapter.setCheckForCorruptJournalFiles(true);
adapter.setIgnoreMissingJournalfiles(true);
}
@After
public void tearDown() throws Exception {
if (broker != null) {
broker.stop();
broker.waitUntilStopped();
}
}
@Test
public void testRecoveryAfterCorruptionMiddle() throws Exception {
startBroker();
produceMessagesToConsumeMultipleDataFiles(50);
int numFiles = getNumberOfJournalFiles();
assertTrue("more than x files: " + numFiles, numFiles > 4);
corruptBatchMiddle(3);
restartBroker();
assertEquals("missing one message", 49, broker.getAdminView().getTotalMessageCount());
assertEquals("Drain", 49, drainQueue(49));
}
@Test
public void testRecoveryAfterCorruptionEnd() throws Exception {
startBroker();
produceMessagesToConsumeMultipleDataFiles(50);
int numFiles = getNumberOfJournalFiles();
assertTrue("more than x files: " + numFiles, numFiles > 4);
corruptBatchEnd(4);
restartBroker();
assertEquals("missing one message", 49, broker.getAdminView().getTotalMessageCount());
assertEquals("Drain", 49, drainQueue(49));
}
@Test
public void testRecoveryAfterCorruption() throws Exception {
startBroker();
produceMessagesToConsumeMultipleDataFiles(50);
int numFiles = getNumberOfJournalFiles();
assertTrue("more than x files: " + numFiles, numFiles > 4);
corruptBatchMiddle(3);
corruptBatchEnd(4);
restartBroker();
assertEquals("missing one message", 48, broker.getAdminView().getTotalMessageCount());
assertEquals("Drain", 48, drainQueue(48));
}
private void whackIndex(File dataDir) {
File indexToDelete = new File(dataDir, "db.data");
LOG.info("Whacking index: " + indexToDelete);
indexToDelete.delete();
}
private void corruptBatchMiddle(int i) throws IOException {
corruptBatch(i, false);
}
private void corruptBatchEnd(int i) throws IOException {
corruptBatch(i, true);
}
private void corruptBatch(int id, boolean atEnd) throws IOException {
Collection<DataFile> files =
((KahaDBPersistenceAdapter) broker.getPersistenceAdapter()).getStore().getJournal().getFileMap().values();
DataFile dataFile = (DataFile) files.toArray()[id];
RecoverableRandomAccessFile randomAccessFile = dataFile.openRandomAccessFile();
final ByteSequence header = new ByteSequence(Journal.BATCH_CONTROL_RECORD_HEADER);
byte data[] = new byte[1024 * 20];
ByteSequence bs = new ByteSequence(data, 0, randomAccessFile.read(data, 0, data.length));
int pos = 0;
int offset = 0;
int end = atEnd ? Integer.MAX_VALUE : 3;
for (int i = 0; i < end; i++) {
int found = bs.indexOf(header, pos);
if (found == -1) {
break;
}
offset = found;
pos++;
}
LOG.info("Whacking batch record in file:" + id + ", at offset: " + offset + " with fill:" + fill);
// whack that record
byte[] bla = new byte[Journal.BATCH_CONTROL_RECORD_HEADER.length];
Arrays.fill(bla, fill);
randomAccessFile.seek(offset);
randomAccessFile.write(bla, 0, bla.length);
}
private int getNumberOfJournalFiles() throws IOException {
Collection<DataFile> files =
((KahaDBPersistenceAdapter) broker.getPersistenceAdapter()).getStore().getJournal().getFileMap().values();
int reality = 0;
for (DataFile file : files) {
if (file != null) {
reality++;
}
}
return reality;
}
private int produceMessages(Destination destination, int numToSend) throws Exception {
int sent = 0;
Connection connection = new ActiveMQConnectionFactory(
broker.getTransportConnectors().get(0).getConnectUri()).createConnection();
connection.start();
try {
Session session = connection.createSession(false, Session.AUTO_ACKNOWLEDGE);
MessageProducer producer = session.createProducer(destination);
for (int i = 0; i < numToSend; i++) {
producer.send(createMessage(session, i));
sent++;
}
} finally {
connection.close();
}
return sent;
}
private int produceMessagesToConsumeMultipleDataFiles(int numToSend) throws Exception {
return produceMessages(destination, numToSend);
}
final String payload = new String(new byte[1024]);
private Message createMessage(Session session, int i) throws Exception {
return session.createTextMessage(payload + "::" + i);
}
private int drainQueue(int max) throws Exception {
Connection connection = cf.createConnection();
connection.start();
Session session = connection.createSession(false, Session.AUTO_ACKNOWLEDGE);
MessageConsumer consumer = session.createConsumer(destination);
int count = 0;
while (count < max && consumer.receive(5000) != null) {
count++;
}
consumer.close();
connection.close();
return count;
}
}