mirror of https://github.com/apache/activemq.git
AMQ-2405: Adding more robust/graceful disk corruption recovery logic.
git-svn-id: https://svn.apache.org/repos/asf/activemq/trunk@818155 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
785454a366
commit
c9500f45ef
|
@ -349,4 +349,19 @@ public class KahaDBPersistenceAdapter implements PersistenceAdapter {
|
||||||
this.letter.setIgnoreMissingJournalfiles(ignoreMissingJournalfiles);
|
this.letter.setIgnoreMissingJournalfiles(ignoreMissingJournalfiles);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean isChecksumJournalFiles() {
|
||||||
|
return letter.isChecksumJournalFiles();
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isCheckForCorruptJournalFiles() {
|
||||||
|
return letter.isCheckForCorruptJournalFiles();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setChecksumJournalFiles(boolean checksumJournalFiles) {
|
||||||
|
letter.setChecksumJournalFiles(checksumJournalFiles);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setCheckForCorruptJournalFiles(boolean checkForCorruptJournalFiles) {
|
||||||
|
letter.setCheckForCorruptJournalFiles(checkForCorruptJournalFiles);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,16 +22,7 @@ import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.OutputStream;
|
import java.io.OutputStream;
|
||||||
import java.util.ArrayList;
|
import java.util.*;
|
||||||
import java.util.Date;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.LinkedHashMap;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.SortedSet;
|
|
||||||
import java.util.TreeMap;
|
|
||||||
import java.util.TreeSet;
|
|
||||||
import java.util.Map.Entry;
|
import java.util.Map.Entry;
|
||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
|
||||||
|
@ -60,6 +51,7 @@ import org.apache.kahadb.index.BTreeIndex;
|
||||||
import org.apache.kahadb.index.BTreeVisitor;
|
import org.apache.kahadb.index.BTreeVisitor;
|
||||||
import org.apache.kahadb.journal.Journal;
|
import org.apache.kahadb.journal.Journal;
|
||||||
import org.apache.kahadb.journal.Location;
|
import org.apache.kahadb.journal.Location;
|
||||||
|
import org.apache.kahadb.journal.DataFile;
|
||||||
import org.apache.kahadb.page.Page;
|
import org.apache.kahadb.page.Page;
|
||||||
import org.apache.kahadb.page.PageFile;
|
import org.apache.kahadb.page.PageFile;
|
||||||
import org.apache.kahadb.page.Transaction;
|
import org.apache.kahadb.page.Transaction;
|
||||||
|
@ -151,6 +143,8 @@ public class MessageDatabase {
|
||||||
private LockFile lockFile;
|
private LockFile lockFile;
|
||||||
private boolean ignoreMissingJournalfiles = false;
|
private boolean ignoreMissingJournalfiles = false;
|
||||||
private int indexCacheSize = 100;
|
private int indexCacheSize = 100;
|
||||||
|
private boolean checkForCorruptJournalFiles = false;
|
||||||
|
private boolean checksumJournalFiles = false;
|
||||||
|
|
||||||
public MessageDatabase() {
|
public MessageDatabase() {
|
||||||
}
|
}
|
||||||
|
@ -406,6 +400,15 @@ public class MessageDatabase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
long end = System.currentTimeMillis();
|
||||||
|
if( undoCounter > 0 ) {
|
||||||
|
// The rolledback operations are basically in flight journal writes. To avoid getting these the end user
|
||||||
|
// should do sync writes to the journal.
|
||||||
|
LOG.info("Rolled back " + undoCounter + " messages from the index in " + ((end - start) / 1000.0f) + " seconds.");
|
||||||
|
}
|
||||||
|
|
||||||
|
undoCounter = 0;
|
||||||
|
start = System.currentTimeMillis();
|
||||||
|
|
||||||
// Lets be extra paranoid here and verify that all the datafiles being referenced
|
// Lets be extra paranoid here and verify that all the datafiles being referenced
|
||||||
// by the indexes still exists.
|
// by the indexes still exists.
|
||||||
|
@ -445,40 +448,64 @@ public class MessageDatabase {
|
||||||
missingJournalFiles.removeAll( journal.getFileMap().keySet() );
|
missingJournalFiles.removeAll( journal.getFileMap().keySet() );
|
||||||
|
|
||||||
if( !missingJournalFiles.isEmpty() ) {
|
if( !missingJournalFiles.isEmpty() ) {
|
||||||
if( ignoreMissingJournalfiles ) {
|
LOG.info("Some journal files are missing: "+missingJournalFiles);
|
||||||
|
}
|
||||||
|
|
||||||
for (StoredDestination sd : storedDestinations.values()) {
|
ArrayList<BTreeVisitor.Predicate<Location>> missingPredicates = new ArrayList<BTreeVisitor.Predicate<Location>>();
|
||||||
|
for (Integer missing : missingJournalFiles) {
|
||||||
|
missingPredicates.add(new BTreeVisitor.BetweenVisitor<Location, Long>(new Location(missing,0), new Location(missing+1,0)));
|
||||||
|
}
|
||||||
|
|
||||||
final ArrayList<Long> matches = new ArrayList<Long>();
|
if ( checkForCorruptJournalFiles ) {
|
||||||
for (Integer missing : missingJournalFiles) {
|
Collection<DataFile> dataFiles = journal.getFileMap().values();
|
||||||
sd.locationIndex.visit(tx, new BTreeVisitor.BetweenVisitor<Location, Long>(new Location(missing,0), new Location(missing+1,0)) {
|
for (DataFile dataFile : dataFiles) {
|
||||||
@Override
|
int id = dataFile.getDataFileId();
|
||||||
protected void matched(Location key, Long value) {
|
missingPredicates.add(new BTreeVisitor.BetweenVisitor<Location, Long>(new Location(id,dataFile.getLength()), new Location(id+1,0)));
|
||||||
matches.add(value);
|
Sequence seq = dataFile.getCorruptedBlocks().getHead();
|
||||||
}
|
while( seq!=null ) {
|
||||||
});
|
missingPredicates.add(new BTreeVisitor.BetweenVisitor<Location, Long>(new Location(id, (int) seq.getFirst()), new Location(id, (int) seq.getLast()+1)));
|
||||||
}
|
seq = seq.getNext();
|
||||||
|
|
||||||
|
|
||||||
for (Long sequenceId : matches) {
|
|
||||||
MessageKeys keys = sd.orderIndex.remove(tx, sequenceId);
|
|
||||||
sd.locationIndex.remove(tx, keys.location);
|
|
||||||
sd.messageIdIndex.remove(tx, keys.messageId);
|
|
||||||
undoCounter++;
|
|
||||||
// TODO: do we need to modify the ack positions for the pub sub case?
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
|
||||||
throw new IOException("Detected missing journal files: "+missingJournalFiles);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
long end = System.currentTimeMillis();
|
if( !missingPredicates.isEmpty() ) {
|
||||||
|
for (StoredDestination sd : storedDestinations.values()) {
|
||||||
|
|
||||||
|
final ArrayList<Long> matches = new ArrayList<Long>();
|
||||||
|
sd.locationIndex.visit(tx, new BTreeVisitor.OrVisitor<Location, Long>(missingPredicates) {
|
||||||
|
protected void matched(Location key, Long value) {
|
||||||
|
matches.add(value);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// If somes message references are affected by the missing data files...
|
||||||
|
if( !matches.isEmpty() ) {
|
||||||
|
|
||||||
|
// We either 'gracefully' recover dropping the missing messages or
|
||||||
|
// we error out.
|
||||||
|
if( ignoreMissingJournalfiles ) {
|
||||||
|
// Update the index to remove the references to the missing data
|
||||||
|
for (Long sequenceId : matches) {
|
||||||
|
MessageKeys keys = sd.orderIndex.remove(tx, sequenceId);
|
||||||
|
sd.locationIndex.remove(tx, keys.location);
|
||||||
|
sd.messageIdIndex.remove(tx, keys.messageId);
|
||||||
|
undoCounter++;
|
||||||
|
// TODO: do we need to modify the ack positions for the pub sub case?
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
throw new IOException("Detected missing/corrupt journal files. "+matches.size()+" messages affected.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
end = System.currentTimeMillis();
|
||||||
if( undoCounter > 0 ) {
|
if( undoCounter > 0 ) {
|
||||||
// The rolledback operations are basically in flight journal writes. To avoid getting these the end user
|
// The rolledback operations are basically in flight journal writes. To avoid getting these the end user
|
||||||
// should do sync writes to the journal.
|
// should do sync writes to the journal.
|
||||||
LOG.info("Rolled back " + undoCounter + " operations from the index in " + ((end - start) / 1000.0f) + " seconds.");
|
LOG.info("Detected missing/corrupt journal files. Dropped " + undoCounter + " messages from the index in " + ((end - start) / 1000.0f) + " seconds.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1339,6 +1366,8 @@ public class MessageDatabase {
|
||||||
Journal manager = new Journal();
|
Journal manager = new Journal();
|
||||||
manager.setDirectory(directory);
|
manager.setDirectory(directory);
|
||||||
manager.setMaxFileLength(getJournalMaxFileLength());
|
manager.setMaxFileLength(getJournalMaxFileLength());
|
||||||
|
manager.setCheckForCorruptionOnStartup(checkForCorruptJournalFiles);
|
||||||
|
manager.setChecksum(checksumJournalFiles || checkForCorruptJournalFiles);
|
||||||
manager.setWriteBatchSize(getJournalMaxWriteBatchSize());
|
manager.setWriteBatchSize(getJournalMaxWriteBatchSize());
|
||||||
return manager;
|
return manager;
|
||||||
}
|
}
|
||||||
|
@ -1452,4 +1481,20 @@ public class MessageDatabase {
|
||||||
public void setIndexCacheSize(int indexCacheSize) {
|
public void setIndexCacheSize(int indexCacheSize) {
|
||||||
this.indexCacheSize = indexCacheSize;
|
this.indexCacheSize = indexCacheSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean isCheckForCorruptJournalFiles() {
|
||||||
|
return checkForCorruptJournalFiles;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setCheckForCorruptJournalFiles(boolean checkForCorruptJournalFiles) {
|
||||||
|
this.checkForCorruptJournalFiles = checkForCorruptJournalFiles;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isChecksumJournalFiles() {
|
||||||
|
return checksumJournalFiles;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setChecksumJournalFiles(boolean checksumJournalFiles) {
|
||||||
|
this.checksumJournalFiles = checksumJournalFiles;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue