LUCENE-702: make sure addIndexes(*) does not corrupt index on disk full; change to transactional semantics; fix IndexWriter.mergeSegments and IndexReader.commit to clean up (and leave instance consistent) on exception

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@488330 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2006-12-18 16:45:29 +00:00
parent 0d8f7b704a
commit d6823ef374
16 changed files with 1540 additions and 178 deletions

View File

@ -240,6 +240,15 @@ Bug fixes
"The handle is invalid" IOExceptions on Windows when trying to "The handle is invalid" IOExceptions on Windows when trying to
close readers or writers. (Michael Busch via Mike McCandless). close readers or writers. (Michael Busch via Mike McCandless).
26. LUCENE-702: Fix IndexWriter.addIndexes(*) to not corrupt the index
on any exceptions (eg disk full). The semantics of these methods
is now transactional: either all indices are merged or none are.
Also fixed IndexWriter.mergeSegments (called outside of
addIndexes(*) by addDocument, optimize, flushRamSegments) and
IndexReader.commit() (called by close) to clean up and keep the
instance state consistent to what's actually in the index (Mike
McCandless).
Optimizations Optimizations
1. LUCENE-586: TermDocs.skipTo() is now more efficient for 1. LUCENE-586: TermDocs.skipTo() is now more efficient for

View File

@ -26,6 +26,8 @@ import java.io.IOException;
import java.io.PrintStream; import java.io.PrintStream;
import java.util.Vector; import java.util.Vector;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
/** /**
* A utility class (used by both IndexReader and * A utility class (used by both IndexReader and
@ -35,7 +37,7 @@ import java.util.HashMap;
*/ */
public class IndexFileDeleter { public class IndexFileDeleter {
private Vector deletable; private Vector deletable;
private Vector pending; private HashSet pending;
private Directory directory; private Directory directory;
private SegmentInfos segmentInfos; private SegmentInfos segmentInfos;
private PrintStream infoStream; private PrintStream infoStream;
@ -45,6 +47,12 @@ public class IndexFileDeleter {
this.segmentInfos = segmentInfos; this.segmentInfos = segmentInfos;
this.directory = directory; this.directory = directory;
} }
void setSegmentInfos(SegmentInfos segmentInfos) {
this.segmentInfos = segmentInfos;
}
SegmentInfos getSegmentInfos() {
return segmentInfos;
}
void setInfoStream(PrintStream infoStream) { void setInfoStream(PrintStream infoStream) {
this.infoStream = infoStream; this.infoStream = infoStream;
@ -134,6 +142,10 @@ public class IndexFileDeleter {
// This is an orphan'd separate norms file: // This is an orphan'd separate norms file:
doDelete = true; doDelete = true;
} }
} else if ("cfs".equals(extension) && !info.getUseCompoundFile()) {
// This is a partially written
// _segmentName.cfs:
doDelete = true;
} }
} }
} }
@ -167,6 +179,30 @@ public class IndexFileDeleter {
deleteFiles(reader.files(), reader.directory()); // delete other files deleteFiles(reader.files(), reader.directory()); // delete other files
} }
} }
/**
* Delete these segments, as long as they are not listed
* in protectedSegments. If they are, then, instead, add
* them to the pending set.
*/
public final void deleteSegments(Vector segments, HashSet protectedSegments) throws IOException {
deleteFiles(); // try to delete files that we couldn't before
for (int i = 0; i < segments.size(); i++) {
SegmentReader reader = (SegmentReader)segments.elementAt(i);
if (reader.directory() == this.directory) {
if (protectedSegments.contains(reader.getSegmentName())) {
addPendingFiles(reader.files()); // record these for deletion on commit
} else {
deleteFiles(reader.files()); // try to delete our files
}
} else {
deleteFiles(reader.files(), reader.directory()); // delete other files
}
}
}
public final void deleteFiles(Vector files, Directory directory) public final void deleteFiles(Vector files, Directory directory)
throws IOException { throws IOException {
@ -199,22 +235,51 @@ public class IndexFileDeleter {
pending = null; pending = null;
} }
final void addPendingFile(String fileName) { /*
if (pending == null) { Record that the files for these segments should be
pending = new Vector(); deleted, once the pending deletes are committed.
*/
final void addPendingSegments(Vector segments) throws IOException {
for (int i = 0; i < segments.size(); i++) {
SegmentReader reader = (SegmentReader)segments.elementAt(i);
if (reader.directory() == this.directory) {
addPendingFiles(reader.files());
}
} }
pending.addElement(fileName);
} }
final void commitPendingFiles() { /*
Record list of files for deletion, but do not delete
them until commitPendingFiles is called.
*/
final void addPendingFiles(Vector files) {
for(int i=0;i<files.size();i++) {
addPendingFile((String) files.elementAt(i));
}
}
/*
Record a file for deletion, but do not delete it until
commitPendingFiles is called.
*/
final void addPendingFile(String fileName) {
if (pending == null) {
pending = new HashSet();
}
pending.add(fileName);
}
final void commitPendingFiles() throws IOException {
if (pending != null) { if (pending != null) {
if (deletable == null) { if (deletable == null) {
deletable = pending; deletable = new Vector();
pending = null;
} else {
deletable.addAll(pending);
pending = null;
} }
Iterator it = pending.iterator();
while(it.hasNext()) {
deletable.addElement(it.next());
}
pending = null;
deleteFiles();
} }
} }

View File

@ -120,6 +120,10 @@ public abstract class IndexReader {
private boolean stale; private boolean stale;
private boolean hasChanges; private boolean hasChanges;
/** Used by commit() to record pre-commit state in case
* rollback is necessary */
private boolean rollbackHasChanges;
private SegmentInfos rollbackSegmentInfos;
/** Returns an IndexReader reading the index in an FSDirectory in the named /** Returns an IndexReader reading the index in an FSDirectory in the named
path. */ path. */
@ -584,7 +588,43 @@ public abstract class IndexReader {
protected abstract void doUndeleteAll() throws IOException; protected abstract void doUndeleteAll() throws IOException;
/** /**
* Commit changes resulting from delete, undeleteAll, or setNorm operations * Should internally checkpoint state that will change
* during commit so that we can rollback if necessary.
*/
void startCommit() {
if (directoryOwner) {
rollbackSegmentInfos = (SegmentInfos) segmentInfos.clone();
}
rollbackHasChanges = hasChanges;
}
/**
* Rolls back state to just before the commit (this is
* called by commit() if there is some exception while
* committing).
*/
void rollbackCommit() {
if (directoryOwner) {
for(int i=0;i<segmentInfos.size();i++) {
// Rollback each segmentInfo. Because the
// SegmentReader holds a reference to the
// SegmentInfo we can't [easily] just replace
// segmentInfos, so we reset it in place instead:
segmentInfos.info(i).reset(rollbackSegmentInfos.info(i));
}
rollbackSegmentInfos = null;
}
hasChanges = rollbackHasChanges;
}
/**
* Commit changes resulting from delete, undeleteAll, or
* setNorm operations
*
* If an exception is hit, then either no changes or all
* changes will have been committed to the index
* (transactional semantics).
* *
* @throws IOException * @throws IOException
*/ */
@ -597,15 +637,53 @@ public abstract class IndexReader {
deleter.deleteFiles(); deleter.deleteFiles();
} }
if(directoryOwner){ if(directoryOwner){
deleter.clearPendingFiles();
doCommit();
String oldInfoFileName = segmentInfos.getCurrentSegmentFileName();
segmentInfos.write(directory);
// Attempt to delete all files we just obsoleted:
// Should not be necessary: no prior commit should
// have left pending files, so just defensive:
deleter.clearPendingFiles();
String oldInfoFileName = segmentInfos.getCurrentSegmentFileName();
String nextSegmentsFileName = segmentInfos.getNextSegmentFileName();
// Checkpoint the state we are about to change, in
// case we have to roll back:
startCommit();
boolean success = false;
try {
doCommit();
segmentInfos.write(directory);
success = true;
} finally {
if (!success) {
// Rollback changes that were made to
// SegmentInfos but failed to get [fully]
// committed. This way this reader instance
// remains consistent (matched to what's
// actually in the index):
rollbackCommit();
// Erase any pending files that we were going to delete:
deleter.clearPendingFiles();
// Remove possibly partially written next
// segments file:
deleter.deleteFile(nextSegmentsFileName);
// Recompute deletable files & remove them (so
// partially written .del files, etc, are
// removed):
deleter.findDeletableFiles();
deleter.deleteFiles();
}
}
// Attempt to delete all files we just obsoleted:
deleter.deleteFile(oldInfoFileName); deleter.deleteFile(oldInfoFileName);
deleter.commitPendingFiles(); deleter.commitPendingFiles();
deleter.deleteFiles();
if (writeLock != null) { if (writeLock != null) {
writeLock.release(); // release write lock writeLock.release(); // release write lock
writeLock = null; writeLock = null;

View File

@ -31,7 +31,7 @@ import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.io.PrintStream; import java.io.PrintStream;
import java.util.Vector; import java.util.Vector;
import java.util.HashSet;
/** /**
An IndexWriter creates and maintains an index. An IndexWriter creates and maintains an index.
@ -99,6 +99,11 @@ public class IndexWriter {
private Similarity similarity = Similarity.getDefault(); // how to normalize private Similarity similarity = Similarity.getDefault(); // how to normalize
private boolean inTransaction = false; // true iff we are in a transaction
private boolean commitPending; // true if segmentInfos has changes not yet committed
private HashSet protectedSegments; // segment names that should not be deleted until commit
private SegmentInfos rollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails
private SegmentInfos segmentInfos = new SegmentInfos(); // the segments private SegmentInfos segmentInfos = new SegmentInfos(); // the segments
private SegmentInfos ramSegmentInfos = new SegmentInfos(); // the segments in ramDirectory private SegmentInfos ramSegmentInfos = new SegmentInfos(); // the segments in ramDirectory
private final RAMDirectory ramDirectory = new RAMDirectory(); // for temp segs private final RAMDirectory ramDirectory = new RAMDirectory(); // for temp segs
@ -473,6 +478,12 @@ public class IndexWriter {
* Adds a document to this index. If the document contains more than * Adds a document to this index. If the document contains more than
* {@link #setMaxFieldLength(int)} terms for a given field, the remainder are * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are
* discarded. * discarded.
*
* Note that if an Exception is hit (eg disk full) then
* the index will be consistent, but this document will
* not have been added. Furthermore, it's possible the
* index will have one segment in non-compound format even
* when using compound files.
*/ */
public void addDocument(Document doc) throws IOException { public void addDocument(Document doc) throws IOException {
addDocument(doc, analyzer); addDocument(doc, analyzer);
@ -483,6 +494,9 @@ public class IndexWriter {
* value of {@link #getAnalyzer()}. If the document contains more than * value of {@link #getAnalyzer()}. If the document contains more than
* {@link #setMaxFieldLength(int)} terms for a given field, the remainder are * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are
* discarded. * discarded.
*
* See @link #addDocument(Document) for details on index
* state after an IOException.
*/ */
public void addDocument(Document doc, Analyzer analyzer) throws IOException { public void addDocument(Document doc, Analyzer analyzer) throws IOException {
DocumentWriter dw = DocumentWriter dw =
@ -563,8 +577,22 @@ public class IndexWriter {
*/ */
private PrintStream infoStream = null; private PrintStream infoStream = null;
/** Merges all segments together into a single segment, optimizing an index /** Merges all segments together into a single segment,
for search. */ * optimizing an index for search..
*
* <p>Note that this requires temporary free space in the
* Directory up to the size of the starting index (exact
* usage could be less but will depend on many
* factors).</p>
* <p>If an Exception is hit during optimize() (eg, due to
* disk full), the index will not be corrupted. However
* it's possible that one of the segments in the index
* will be in non-CFS format even when using compound file
* format. This will occur when the Exception is hit
* during conversion of the segment into compound
* format.</p>
*/
public synchronized void optimize() throws IOException { public synchronized void optimize() throws IOException {
flushRamSegments(); flushRamSegments();
while (segmentInfos.size() > 1 || while (segmentInfos.size() > 1 ||
@ -579,6 +607,85 @@ public class IndexWriter {
} }
} }
/*
* Begin a transaction. During a transaction, any segment
* merges that happen (or ram segments flushed) will not
* write a new segments file and will not remove any files
* that were present at the start of the transaction. You
* must make a matched (try/finall) call to
* commitTransaction() or rollbackTransaction() to finish
* the transaction.
*/
private void startTransaction() throws IOException {
if (inTransaction) {
throw new IOException("transaction is already in process");
}
rollbackSegmentInfos = (SegmentInfos) segmentInfos.clone();
protectedSegments = new HashSet();
for(int i=0;i<segmentInfos.size();i++) {
SegmentInfo si = (SegmentInfo) segmentInfos.elementAt(i);
protectedSegments.add(si.name);
}
inTransaction = true;
}
/*
* Rolls back the transaction and restores state to where
* we were at the start.
*/
private void rollbackTransaction() throws IOException {
// Keep the same segmentInfos instance but replace all
// of its SegmentInfo instances. This is so the next
// attempt to commit using this instance of IndexWriter
// will always write to a new generation ("write once").
segmentInfos.clear();
segmentInfos.addAll(rollbackSegmentInfos);
// Ask deleter to locate unreferenced files & remove
// them:
deleter.clearPendingFiles();
deleter.findDeletableFiles();
deleter.deleteFiles();
clearTransaction();
}
/*
* Commits the transaction. This will write the new
* segments file and remove and pending deletions we have
* accumulated during the transaction
*/
private void commitTransaction() throws IOException {
if (commitPending) {
boolean success = false;
try {
// If we hit eg disk full during this write we have
// to rollback.:
segmentInfos.write(directory); // commit changes
success = true;
} finally {
if (!success) {
rollbackTransaction();
}
}
deleter.commitPendingFiles();
commitPending = false;
}
clearTransaction();
}
/* Should only be called by rollbackTransaction &
* commitTransaction */
private void clearTransaction() {
protectedSegments = null;
rollbackSegmentInfos = null;
inTransaction = false;
}
/** Merges all segments from an array of indexes into this index. /** Merges all segments from an array of indexes into this index.
* *
* <p>This may be used to parallelize batch indexing. A large document * <p>This may be used to parallelize batch indexing. A large document
@ -587,27 +694,68 @@ public class IndexWriter {
* complete index can then be created by merging sub-collection indexes * complete index can then be created by merging sub-collection indexes
* with this method. * with this method.
* *
* <p>After this completes, the index is optimized. */ * <p>After this completes, the index is optimized.
*
* <p>This method is transactional in how Exceptions are
* handled: it does not commit a new segments_N file until
* all indexes are added. This means if an Exception
* occurs (eg disk full), then either no indexes will have
* been added or they all will have been.</p>
*
* <p>If an Exception is hit, it's still possible that all
* indexes were successfully added. This happens when the
* Exception is hit when trying to build a CFS file. In
* this case, one segment in the index will be in non-CFS
* format, even when using compound file format.</p>
*
* <p>Also note that on an Exception, the index may still
* have been partially or fully optimized even though none
* of the input indexes were added. </p>
*
* <p>Note that this requires temporary free space in the
* Directory up to 2X the sum of all input indexes
* (including the starting index). Exact usage could be
* less but will depend on many factors.</p>
*
* <p>See <a target="_top"
* href="http://issues.apache.org/jira/browse/LUCENE-702">LUCENE-702</a>
* for details.</p>
*/
public synchronized void addIndexes(Directory[] dirs) public synchronized void addIndexes(Directory[] dirs)
throws IOException { throws IOException {
optimize(); // start with zero or 1 seg optimize(); // start with zero or 1 seg
int start = segmentInfos.size(); int start = segmentInfos.size();
for (int i = 0; i < dirs.length; i++) { boolean success = false;
SegmentInfos sis = new SegmentInfos(); // read infos from dir
sis.read(dirs[i]);
for (int j = 0; j < sis.size(); j++) {
segmentInfos.addElement(sis.info(j)); // add each info
}
}
// merge newly added segments in log(n) passes startTransaction();
while (segmentInfos.size() > start+mergeFactor) {
for (int base = start; base < segmentInfos.size(); base++) { try {
int end = Math.min(segmentInfos.size(), base+mergeFactor); for (int i = 0; i < dirs.length; i++) {
if (end-base > 1) SegmentInfos sis = new SegmentInfos(); // read infos from dir
mergeSegments(segmentInfos, base, end); sis.read(dirs[i]);
for (int j = 0; j < sis.size(); j++) {
segmentInfos.addElement(sis.info(j)); // add each info
}
}
// merge newly added segments in log(n) passes
while (segmentInfos.size() > start+mergeFactor) {
for (int base = start; base < segmentInfos.size(); base++) {
int end = Math.min(segmentInfos.size(), base+mergeFactor);
if (end-base > 1) {
mergeSegments(segmentInfos, base, end);
}
}
}
success = true;
} finally {
if (success) {
commitTransaction();
} else {
rollbackTransaction();
} }
} }
@ -623,6 +771,11 @@ public class IndexWriter {
* <p> * <p>
* This requires this index not be among those to be added, and the * This requires this index not be among those to be added, and the
* upper bound* of those segment doc counts not exceed maxMergeDocs. * upper bound* of those segment doc counts not exceed maxMergeDocs.
*
* <p>See {@link #addIndexes(Directory[])} for
* details on transactional semantics, temporary free
* space required in the Directory, and non-CFS segments
* on an Exception.</p>
*/ */
public synchronized void addIndexesNoOptimize(Directory[] dirs) public synchronized void addIndexesNoOptimize(Directory[] dirs)
throws IOException { throws IOException {
@ -651,96 +804,114 @@ public class IndexWriter {
// and target may use compound file or not. So we use mergeSegments() to // and target may use compound file or not. So we use mergeSegments() to
// copy a segment, which may cause doc count to change because deleted // copy a segment, which may cause doc count to change because deleted
// docs are garbage collected. // docs are garbage collected.
//
// In current addIndexes(Directory[]), segment infos in S are added to
// T's "segmentInfos" upfront. Then segments in S are merged to T several
// at a time. Every merge is committed with T's "segmentInfos". So if
// a reader is opened on T while addIndexes() is going on, it could see
// an inconsistent index. AddIndexesNoOptimize() has a similar behaviour.
// 1 flush ram segments // 1 flush ram segments
flushRamSegments(); flushRamSegments();
// 2 copy segment infos and find the highest level from dirs // 2 copy segment infos and find the highest level from dirs
int start = segmentInfos.size(); int start = segmentInfos.size();
int startUpperBound = minMergeDocs; int startUpperBound = minMergeDocs;
boolean success = false;
startTransaction();
try { try {
for (int i = 0; i < dirs.length; i++) {
if (directory == dirs[i]) {
// cannot add this index: segments may be deleted in merge before added
throw new IllegalArgumentException("Cannot add this index to itself");
}
SegmentInfos sis = new SegmentInfos(); // read infos from dir try {
sis.read(dirs[i]); for (int i = 0; i < dirs.length; i++) {
for (int j = 0; j < sis.size(); j++) { if (directory == dirs[i]) {
SegmentInfo info = sis.info(j); // cannot add this index: segments may be deleted in merge before added
segmentInfos.addElement(info); // add each info throw new IllegalArgumentException("Cannot add this index to itself");
}
while (startUpperBound < info.docCount) { SegmentInfos sis = new SegmentInfos(); // read infos from dir
startUpperBound *= mergeFactor; // find the highest level from dirs sis.read(dirs[i]);
if (startUpperBound > maxMergeDocs) { for (int j = 0; j < sis.size(); j++) {
// upper bound cannot exceed maxMergeDocs SegmentInfo info = sis.info(j);
throw new IllegalArgumentException("Upper bound cannot exceed maxMergeDocs"); segmentInfos.addElement(info); // add each info
while (startUpperBound < info.docCount) {
startUpperBound *= mergeFactor; // find the highest level from dirs
if (startUpperBound > maxMergeDocs) {
// upper bound cannot exceed maxMergeDocs
throw new IllegalArgumentException("Upper bound cannot exceed maxMergeDocs");
}
} }
} }
} }
} catch (IllegalArgumentException e) {
for (int i = segmentInfos.size() - 1; i >= start; i--) {
segmentInfos.remove(i);
}
throw e;
} }
} catch (IllegalArgumentException e) {
for (int i = segmentInfos.size() - 1; i >= start; i--) { // 3 maybe merge segments starting from the highest level from dirs
segmentInfos.remove(i); maybeMergeSegments(startUpperBound);
// get the tail segments whose levels <= h
int segmentCount = segmentInfos.size();
int numTailSegments = 0;
while (numTailSegments < segmentCount
&& startUpperBound >= segmentInfos.info(segmentCount - 1 - numTailSegments).docCount) {
numTailSegments++;
} }
throw e; if (numTailSegments == 0) {
} success = true;
// 3 maybe merge segments starting from the highest level from dirs
maybeMergeSegments(startUpperBound);
// get the tail segments whose levels <= h
int segmentCount = segmentInfos.size();
int numTailSegments = 0;
while (numTailSegments < segmentCount
&& startUpperBound >= segmentInfos.info(segmentCount - 1 - numTailSegments).docCount) {
numTailSegments++;
}
if (numTailSegments == 0) {
return;
}
// 4 make sure invariants hold for the tail segments whose levels <= h
if (checkNonDecreasingLevels(segmentCount - numTailSegments)) {
// identify the segments from S to be copied (not merged in 3)
int numSegmentsToCopy = 0;
while (numSegmentsToCopy < segmentCount
&& directory != segmentInfos.info(segmentCount - 1 - numSegmentsToCopy).dir) {
numSegmentsToCopy++;
}
if (numSegmentsToCopy == 0) {
return; return;
} }
// copy those segments from S // 4 make sure invariants hold for the tail segments whose levels <= h
for (int i = segmentCount - numSegmentsToCopy; i < segmentCount; i++) { if (checkNonDecreasingLevels(segmentCount - numTailSegments)) {
mergeSegments(segmentInfos, i, i + 1); // identify the segments from S to be copied (not merged in 3)
} int numSegmentsToCopy = 0;
if (checkNonDecreasingLevels(segmentCount - numSegmentsToCopy)) { while (numSegmentsToCopy < segmentCount
return; && directory != segmentInfos.info(segmentCount - 1 - numSegmentsToCopy).dir) {
} numSegmentsToCopy++;
} }
if (numSegmentsToCopy == 0) {
success = true;
return;
}
// invariants do not hold, simply merge those segments // copy those segments from S
mergeSegments(segmentInfos, segmentCount - numTailSegments, segmentCount); for (int i = segmentCount - numSegmentsToCopy; i < segmentCount; i++) {
mergeSegments(segmentInfos, i, i + 1);
}
if (checkNonDecreasingLevels(segmentCount - numSegmentsToCopy)) {
success = true;
return;
}
}
// maybe merge segments again if necessary // invariants do not hold, simply merge those segments
if (segmentInfos.info(segmentInfos.size() - 1).docCount > startUpperBound) { mergeSegments(segmentInfos, segmentCount - numTailSegments, segmentCount);
maybeMergeSegments(startUpperBound * mergeFactor);
// maybe merge segments again if necessary
if (segmentInfos.info(segmentInfos.size() - 1).docCount > startUpperBound) {
maybeMergeSegments(startUpperBound * mergeFactor);
}
success = true;
} finally {
if (success) {
commitTransaction();
} else {
rollbackTransaction();
}
} }
} }
/** Merges the provided indexes into this index. /** Merges the provided indexes into this index.
* <p>After this completes, the index is optimized. </p> * <p>After this completes, the index is optimized. </p>
* <p>The provided IndexReaders are not closed.</p> * <p>The provided IndexReaders are not closed.</p>
* <p>See {@link #addIndexes(Directory[])} for
* details on transactional semantics, temporary free
* space required in the Directory, and non-CFS segments
* on an Exception.</p>
*/ */
public synchronized void addIndexes(IndexReader[] readers) public synchronized void addIndexes(IndexReader[] readers)
throws IOException { throws IOException {
@ -761,26 +932,61 @@ public class IndexWriter {
for (int i = 0; i < readers.length; i++) // add new indexes for (int i = 0; i < readers.length; i++) // add new indexes
merger.add(readers[i]); merger.add(readers[i]);
int docCount = merger.merge(); // merge 'em SegmentInfo info;
segmentInfos.setSize(0); // pop old infos & add new
SegmentInfo info = new SegmentInfo(mergedName, docCount, directory, false);
segmentInfos.addElement(info);
if(sReader != null)
sReader.close();
String segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName(); String segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName();
segmentInfos.write(directory); // commit changes
boolean success = false;
startTransaction();
try {
int docCount = merger.merge(); // merge 'em
segmentInfos.setSize(0); // pop old infos & add new
info = new SegmentInfo(mergedName, docCount, directory, false);
segmentInfos.addElement(info);
commitPending = true;
if(sReader != null)
sReader.close();
success = true;
} finally {
if (!success) {
rollbackTransaction();
} else {
commitTransaction();
}
}
deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file
deleter.deleteSegments(segmentsToDelete); // delete now-unused segments deleter.deleteSegments(segmentsToDelete); // delete now-unused segments
if (useCompoundFile) { if (useCompoundFile) {
Vector filesToDelete = merger.createCompoundFile(mergedName + ".cfs"); success = false;
segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName(); segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName();
info.setUseCompoundFile(true); Vector filesToDelete;
segmentInfos.write(directory); // commit again so readers know we've switched this segment to a compound file
startTransaction();
try {
filesToDelete = merger.createCompoundFile(mergedName + ".cfs");
info.setUseCompoundFile(true);
commitPending = true;
success = true;
} finally {
if (!success) {
rollbackTransaction();
} else {
commitTransaction();
}
}
deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file
deleter.deleteFiles(filesToDelete); // delete now unused files of segment deleter.deleteFiles(filesToDelete); // delete now unused files of segment
@ -884,6 +1090,7 @@ public class IndexWriter {
// mergeFactor and/or maxBufferedDocs change(s) // mergeFactor and/or maxBufferedDocs change(s)
while (numSegments >= mergeFactor) { while (numSegments >= mergeFactor) {
// merge the leftmost* mergeFactor segments // merge the leftmost* mergeFactor segments
int docCount = mergeSegments(segmentInfos, minSegment, minSegment + mergeFactor); int docCount = mergeSegments(segmentInfos, minSegment, minSegment + mergeFactor);
numSegments -= mergeFactor; numSegments -= mergeFactor;
@ -921,51 +1128,154 @@ public class IndexWriter {
SegmentMerger merger = new SegmentMerger(this, mergedName); SegmentMerger merger = new SegmentMerger(this, mergedName);
final Vector segmentsToDelete = new Vector(); final Vector segmentsToDelete = new Vector();
for (int i = minSegment; i < end; i++) {
SegmentInfo si = sourceSegments.info(i);
if (infoStream != null)
infoStream.print(" " + si.name + " (" + si.docCount + " docs)");
IndexReader reader = SegmentReader.get(si);
merger.add(reader);
if ((reader.directory() == this.directory) || // if we own the directory
(reader.directory() == this.ramDirectory))
segmentsToDelete.addElement(reader); // queue segment for deletion
}
int mergedDocCount = merger.merge();
if (infoStream != null) {
infoStream.println(" into "+mergedName+" ("+mergedDocCount+" docs)");
}
SegmentInfo newSegment = new SegmentInfo(mergedName, mergedDocCount,
directory, false);
if (sourceSegments == ramSegmentInfos) {
sourceSegments.removeAllElements();
segmentInfos.addElement(newSegment);
} else {
for (int i = end-1; i > minSegment; i--) // remove old infos & add new
sourceSegments.remove(i);
segmentInfos.set(minSegment, newSegment);
}
// close readers before we attempt to delete now-obsolete segments
merger.closeReaders();
String segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName(); String segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName();
segmentInfos.write(directory); // commit before deleting String nextSegmentsFileName = segmentInfos.getNextSegmentFileName();
deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file SegmentInfo newSegment = null;
deleter.deleteSegments(segmentsToDelete); // delete now-unused segments
int mergedDocCount;
// This is try/finally to make sure merger's readers are closed:
try {
for (int i = minSegment; i < end; i++) {
SegmentInfo si = sourceSegments.info(i);
if (infoStream != null)
infoStream.print(" " + si.name + " (" + si.docCount + " docs)");
IndexReader reader = SegmentReader.get(si);
merger.add(reader);
if ((reader.directory() == this.directory) || // if we own the directory
(reader.directory() == this.ramDirectory))
segmentsToDelete.addElement(reader); // queue segment for deletion
}
SegmentInfos rollback = null;
boolean success = false;
// This is try/finally to rollback our internal state
// if we hit exception when doing the merge:
try {
mergedDocCount = merger.merge();
if (infoStream != null) {
infoStream.println(" into "+mergedName+" ("+mergedDocCount+" docs)");
}
newSegment = new SegmentInfo(mergedName, mergedDocCount,
directory, false);
if (sourceSegments == ramSegmentInfos) {
segmentInfos.addElement(newSegment);
} else {
if (!inTransaction) {
// Now save the SegmentInfo instances that
// we are replacing:
rollback = (SegmentInfos) segmentInfos.clone();
}
for (int i = end-1; i > minSegment; i--) // remove old infos & add new
sourceSegments.remove(i);
segmentInfos.set(minSegment, newSegment);
}
if (!inTransaction) {
segmentInfos.write(directory); // commit before deleting
} else {
commitPending = true;
}
success = true;
} finally {
if (success) {
// The non-ram-segments case is already committed
// (above), so all the remains for ram segments case
// is to clear the ram segments:
if (sourceSegments == ramSegmentInfos) {
ramSegmentInfos.removeAllElements();
}
} else if (!inTransaction) {
// Must rollback so our state matches index:
if (sourceSegments == ramSegmentInfos) {
// Simple case: newSegment may or may not have
// been added to the end of our segment infos,
// so just check & remove if so:
if (newSegment != null &&
segmentInfos.size() > 0 &&
segmentInfos.info(segmentInfos.size()-1) == newSegment) {
segmentInfos.remove(segmentInfos.size()-1);
}
} else if (rollback != null) {
// Rollback the individual SegmentInfo
// instances, but keep original SegmentInfos
// instance (so we don't try to write again the
// same segments_N file -- write once):
segmentInfos.clear();
segmentInfos.addAll(rollback);
}
// Delete any partially created files:
deleter.deleteFile(nextSegmentsFileName);
deleter.findDeletableFiles();
deleter.deleteFiles();
}
}
} finally {
// close readers before we attempt to delete now-obsolete segments
merger.closeReaders();
}
if (!inTransaction) {
deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file
deleter.deleteSegments(segmentsToDelete); // delete now-unused segments
} else {
deleter.addPendingFile(segmentsInfosFileName); // delete old segments_N file
deleter.deleteSegments(segmentsToDelete, protectedSegments); // delete now-unused segments
}
if (useCompoundFile) { if (useCompoundFile) {
Vector filesToDelete = merger.createCompoundFile(mergedName + ".cfs");
segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName(); segmentsInfosFileName = nextSegmentsFileName;
newSegment.setUseCompoundFile(true); nextSegmentsFileName = segmentInfos.getNextSegmentFileName();
segmentInfos.write(directory); // commit again so readers know we've switched this segment to a compound file
deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file Vector filesToDelete;
boolean success = false;
try {
filesToDelete = merger.createCompoundFile(mergedName + ".cfs");
newSegment.setUseCompoundFile(true);
if (!inTransaction) {
segmentInfos.write(directory); // commit again so readers know we've switched this segment to a compound file
}
success = true;
} finally {
if (!success && !inTransaction) {
// Must rollback:
newSegment.setUseCompoundFile(false);
deleter.deleteFile(mergedName + ".cfs");
deleter.deleteFile(nextSegmentsFileName);
}
}
if (!inTransaction) {
deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file
}
// We can delete these segments whether or not we are
// in a transaction because we had just written them
// above so they can't need protection by the
// transaction:
deleter.deleteFiles(filesToDelete); // delete now-unused segments deleter.deleteFiles(filesToDelete); // delete now-unused segments
} }

View File

@ -230,6 +230,20 @@ public class MultiReader extends IndexReader {
subReaders[i].commit(); subReaders[i].commit();
} }
void startCommit() {
super.startCommit();
for (int i = 0; i < subReaders.length; i++) {
subReaders[i].startCommit();
}
}
void rollbackCommit() {
super.rollbackCommit();
for (int i = 0; i < subReaders.length; i++) {
subReaders[i].rollbackCommit();
}
}
protected synchronized void doClose() throws IOException { protected synchronized void doClose() throws IOException {
for (int i = 0; i < subReaders.length; i++) for (int i = 0; i < subReaders.length; i++)
subReaders[i].close(); subReaders[i].close();

View File

@ -62,6 +62,23 @@ final class SegmentInfo {
preLockless = false; preLockless = false;
} }
/**
* Copy everything from src SegmentInfo into our instance.
*/
void reset(SegmentInfo src) {
name = src.name;
docCount = src.docCount;
dir = src.dir;
preLockless = src.preLockless;
delGen = src.delGen;
if (src.normGen == null) {
normGen = null;
} else {
normGen = new long[src.normGen.length];
System.arraycopy(src.normGen, 0, normGen, 0, src.normGen.length);
}
isCompoundFile = src.isCompoundFile;
}
/** /**
* Construct a new SegmentInfo instance by reading a * Construct a new SegmentInfo instance by reading a
@ -151,6 +168,17 @@ final class SegmentInfo {
delGen = -1; delGen = -1;
} }
public Object clone () {
SegmentInfo si = new SegmentInfo(name, docCount, dir);
si.isCompoundFile = isCompoundFile;
si.delGen = delGen;
si.preLockless = preLockless;
if (normGen != null) {
si.normGen = (long[]) normGen.clone();
}
return si;
}
String getDelFileName() { String getDelFileName() {
if (delGen == -1) { if (delGen == -1) {
// In this case we know there is no deletion filename // In this case we know there is no deletion filename

View File

@ -50,7 +50,11 @@ public final class SegmentInfos extends Vector {
* starting with the current time in milliseconds forces to create unique version numbers. * starting with the current time in milliseconds forces to create unique version numbers.
*/ */
private long version = System.currentTimeMillis(); private long version = System.currentTimeMillis();
private long generation = 0; // generation of the "segments_N" file we read
private long generation = 0; // generation of the "segments_N" for the next commit
private long lastGeneration = 0; // generation of the "segments_N" file we last successfully read
// or wrote; this is normally the same as generation except if
// there was an IOException that had interrupted a commit
/** /**
* If non-null, information about loading segments_N files * If non-null, information about loading segments_N files
@ -132,12 +136,28 @@ public final class SegmentInfos extends Vector {
} }
/** /**
* Get the segment_N filename in use by this segment infos. * Get the segments_N filename in use by this segment infos.
*/ */
public String getCurrentSegmentFileName() { public String getCurrentSegmentFileName() {
return IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, return IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS,
"", "",
generation); lastGeneration);
}
/**
* Get the next segments_N filename that will be written.
*/
public String getNextSegmentFileName() {
long nextGeneration;
if (generation == -1) {
nextGeneration = 1;
} else {
nextGeneration = generation+1;
}
return IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS,
"",
nextGeneration);
} }
/** /**
@ -158,6 +178,7 @@ public final class SegmentInfos extends Vector {
generation = Long.parseLong(segmentFileName.substring(1+IndexFileNames.SEGMENTS.length()), generation = Long.parseLong(segmentFileName.substring(1+IndexFileNames.SEGMENTS.length()),
Character.MAX_RADIX); Character.MAX_RADIX);
} }
lastGeneration = generation;
try { try {
int format = input.readInt(); int format = input.readInt();
@ -199,7 +220,7 @@ public final class SegmentInfos extends Vector {
*/ */
public final void read(Directory directory) throws IOException { public final void read(Directory directory) throws IOException {
generation = -1; generation = lastGeneration = -1;
new FindSegmentsFile(directory) { new FindSegmentsFile(directory) {
@ -212,6 +233,8 @@ public final class SegmentInfos extends Vector {
public final void write(Directory directory) throws IOException { public final void write(Directory directory) throws IOException {
String segmentFileName = getNextSegmentFileName();
// Always advance the generation on write: // Always advance the generation on write:
if (generation == -1) { if (generation == -1) {
generation = 1; generation = 1;
@ -219,7 +242,6 @@ public final class SegmentInfos extends Vector {
generation++; generation++;
} }
String segmentFileName = getCurrentSegmentFileName();
IndexOutput output = directory.createOutput(segmentFileName); IndexOutput output = directory.createOutput(segmentFileName);
try { try {
@ -229,8 +251,7 @@ public final class SegmentInfos extends Vector {
output.writeInt(counter); // write counter output.writeInt(counter); // write counter
output.writeInt(size()); // write infos output.writeInt(size()); // write infos
for (int i = 0; i < size(); i++) { for (int i = 0; i < size(); i++) {
SegmentInfo si = info(i); info(i).write(output);
si.write(output);
} }
} }
finally { finally {
@ -247,6 +268,21 @@ public final class SegmentInfos extends Vector {
// It's OK if we fail to write this file since it's // It's OK if we fail to write this file since it's
// used only as one of the retry fallbacks. // used only as one of the retry fallbacks.
} }
lastGeneration = generation;
}
/**
* Returns a copy of this instance, also copying each
* SegmentInfo.
*/
public Object clone() {
SegmentInfos sis = (SegmentInfos) super.clone();
for(int i=0;i<sis.size();i++) {
sis.setElementAt(((SegmentInfo) sis.elementAt(i)).clone(), i);
}
return sis;
} }
/** /**
@ -496,7 +532,7 @@ public final class SegmentInfos extends Vector {
if (genLookaheadCount < defaultGenLookaheadCount) { if (genLookaheadCount < defaultGenLookaheadCount) {
gen++; gen++;
genLookaheadCount++; genLookaheadCount++;
message("look ahead incremenent gen to " + gen); message("look ahead increment gen to " + gen);
} }
} }

View File

@ -47,6 +47,10 @@ class SegmentReader extends IndexReader {
private boolean normsDirty = false; private boolean normsDirty = false;
private boolean undeleteAll = false; private boolean undeleteAll = false;
private boolean rollbackDeletedDocsDirty = false;
private boolean rollbackNormsDirty = false;
private boolean rollbackUndeleteAll = false;
IndexInput freqStream; IndexInput freqStream;
IndexInput proxStream; IndexInput proxStream;
@ -64,6 +68,7 @@ class SegmentReader extends IndexReader {
private byte[] bytes; private byte[] bytes;
private boolean dirty; private boolean dirty;
private int number; private int number;
private boolean rollbackDirty;
private void reWrite(SegmentInfo si) throws IOException { private void reWrite(SegmentInfo si) throws IOException {
// NOTE: norms are re-written in regular directory, not cfs // NOTE: norms are re-written in regular directory, not cfs
@ -545,4 +550,39 @@ class SegmentReader extends IndexReader {
return termVectorsReader.get(docNumber); return termVectorsReader.get(docNumber);
} }
/**
* Return the name of the segment this reader is reading.
*/
String getSegmentName() {
return segment;
}
void setSegmentInfo(SegmentInfo info) {
si = info;
}
void startCommit() {
super.startCommit();
rollbackDeletedDocsDirty = deletedDocsDirty;
rollbackNormsDirty = normsDirty;
rollbackUndeleteAll = undeleteAll;
Enumeration values = norms.elements();
while (values.hasMoreElements()) {
Norm norm = (Norm) values.nextElement();
norm.rollbackDirty = norm.dirty;
}
}
void rollbackCommit() {
super.rollbackCommit();
deletedDocsDirty = rollbackDeletedDocsDirty;
normsDirty = rollbackNormsDirty;
undeleteAll = rollbackUndeleteAll;
Enumeration values = norms.elements();
while (values.hasMoreElements()) {
Norm norm = (Norm) values.nextElement();
norm.dirty = norm.rollbackDirty;
}
}
} }

View File

@ -34,13 +34,13 @@ import java.util.Set;
* *
* @version $Id$ * @version $Id$
*/ */
public final class RAMDirectory extends Directory implements Serializable { public class RAMDirectory extends Directory implements Serializable {
private static final long serialVersionUID = 1l; private static final long serialVersionUID = 1l;
private HashMap fileMap = new HashMap(); HashMap fileMap = new HashMap();
private Set fileNames = fileMap.keySet(); private Set fileNames = fileMap.keySet();
private Collection files = fileMap.values(); Collection files = fileMap.values();
long sizeInBytes = 0; long sizeInBytes = 0;
// ***** // *****
@ -178,20 +178,13 @@ public final class RAMDirectory extends Directory implements Serializable {
return file.getLength(); return file.getLength();
} }
/** Return total size in bytes of all files in this directory */ /** Return total size in bytes of all files in this
* directory. This is currently quantized to
* BufferedIndexOutput.BUFFER_SIZE. */
public synchronized final long sizeInBytes() { public synchronized final long sizeInBytes() {
return sizeInBytes; return sizeInBytes;
} }
/** Provided for testing purposes. Use sizeInBytes() instead. */
public synchronized final long getRecomputedSizeInBytes() {
long size = 0;
Iterator it = files.iterator();
while (it.hasNext())
size += ((RAMFile) it.next()).getSizeInBytes();
return size;
}
/** Removes an existing file in the directory. /** Removes an existing file in the directory.
* @throws IOException if the file does not exist * @throws IOException if the file does not exist
*/ */
@ -222,7 +215,7 @@ public final class RAMDirectory extends Directory implements Serializable {
} }
/** Creates a new, empty file in the directory with the given name. Returns a stream writing this file. */ /** Creates a new, empty file in the directory with the given name. Returns a stream writing this file. */
public final IndexOutput createOutput(String name) { public IndexOutput createOutput(String name) {
RAMFile file = new RAMFile(this); RAMFile file = new RAMFile(this);
synchronized (this) { synchronized (this) {
RAMFile existing = (RAMFile)fileMap.get(name); RAMFile existing = (RAMFile)fileMap.get(name);

View File

@ -66,7 +66,7 @@ public class RAMOutputStream extends BufferedIndexOutput {
file.setLength(0); file.setLength(0);
} }
public void flushBuffer(byte[] src, int len) { public void flushBuffer(byte[] src, int len) throws IOException {
byte[] buffer; byte[] buffer;
int bufferPos = 0; int bufferPos = 0;
while (bufferPos != len) { while (bufferPos != len) {

View File

@ -34,7 +34,12 @@ public class TestIndexFileDeleter extends TestCase
Directory dir = new RAMDirectory(); Directory dir = new RAMDirectory();
IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
for(int i=0;i<35;i++) { int i;
for(i=0;i<35;i++) {
addDoc(writer, i);
}
writer.setUseCompoundFile(false);
for(;i<45;i++) {
addDoc(writer, i); addDoc(writer, i);
} }
writer.close(); writer.close();
@ -68,7 +73,7 @@ public class TestIndexFileDeleter extends TestCase
CompoundFileReader cfsReader = new CompoundFileReader(dir, "_2.cfs"); CompoundFileReader cfsReader = new CompoundFileReader(dir, "_2.cfs");
FieldInfos fieldInfos = new FieldInfos(cfsReader, "_2.fnm"); FieldInfos fieldInfos = new FieldInfos(cfsReader, "_2.fnm");
int contentFieldIndex = -1; int contentFieldIndex = -1;
for(int i=0;i<fieldInfos.size();i++) { for(i=0;i<fieldInfos.size();i++) {
FieldInfo fi = fieldInfos.fieldInfo(i); FieldInfo fi = fieldInfos.fieldInfo(i);
if (fi.name.equals("content")) { if (fi.name.equals("content")) {
contentFieldIndex = i; contentFieldIndex = i;
@ -125,6 +130,9 @@ public class TestIndexFileDeleter extends TestCase
copyFile(dir, "segments_a", "segments"); copyFile(dir, "segments_a", "segments");
copyFile(dir, "segments_a", "segments_2"); copyFile(dir, "segments_a", "segments_2");
// Create a bogus cfs file shadowing a non-cfs segment:
copyFile(dir, "_2.cfs", "_3.cfs");
String[] filesPre = dir.list(); String[] filesPre = dir.list();
// Open & close a writer: it should delete the above 4 // Open & close a writer: it should delete the above 4

View File

@ -30,11 +30,18 @@ import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.TermQuery;
import java.util.Collection; import java.util.Collection;
import java.util.Arrays;
import java.io.IOException; import java.io.IOException;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.File; import java.io.File;
import org.apache.lucene.store.MockRAMDirectory;
public class TestIndexReader extends TestCase public class TestIndexReader extends TestCase
{ {
/** Main for running test case by itself. */ /** Main for running test case by itself. */
@ -547,7 +554,213 @@ public class TestIndexReader extends TestCase
public void testDeleteReaderReaderConflictOptimized() throws IOException{ public void testDeleteReaderReaderConflictOptimized() throws IOException{
deleteReaderReaderConflict(true); deleteReaderReaderConflict(true);
} }
/**
* Make sure if reader tries to commit but hits disk
* full that reader remains consistent and usable.
*/
public void testDiskFull() throws IOException {
boolean debug = false;
Term searchTerm = new Term("content", "aaa");
int START_COUNT = 157;
int END_COUNT = 144;
// First build up a starting index:
RAMDirectory startDir = new RAMDirectory();
IndexWriter writer = new IndexWriter(startDir, new WhitespaceAnalyzer(), true);
for(int i=0;i<157;i++) {
Document d = new Document();
d.add(new Field("id", Integer.toString(i), Field.Store.YES, Field.Index.UN_TOKENIZED));
d.add(new Field("content", "aaa " + i, Field.Store.NO, Field.Index.TOKENIZED));
writer.addDocument(d);
}
writer.close();
long diskUsage = startDir.sizeInBytes();
long diskFree = diskUsage+100;
IOException err = null;
boolean done = false;
// Iterate w/ ever increasing free disk space:
while(!done) {
MockRAMDirectory dir = new MockRAMDirectory(startDir);
IndexReader reader = IndexReader.open(dir);
// For each disk size, first try to commit against
// dir that will hit random IOExceptions & disk
// full; after, give it infinite disk space & turn
// off random IOExceptions & retry w/ same reader:
boolean success = false;
for(int x=0;x<2;x++) {
double rate = 0.05;
double diskRatio = ((double) diskFree)/diskUsage;
long thisDiskFree;
String testName;
if (0 == x) {
thisDiskFree = diskFree;
if (diskRatio >= 2.0) {
rate /= 2;
}
if (diskRatio >= 4.0) {
rate /= 2;
}
if (diskRatio >= 6.0) {
rate = 0.0;
}
if (debug) {
System.out.println("\ncycle: " + diskFree + " bytes");
}
testName = "disk full during reader.close() @ " + thisDiskFree + " bytes";
} else {
thisDiskFree = 0;
rate = 0.0;
if (debug) {
System.out.println("\ncycle: same writer: unlimited disk space");
}
testName = "reader re-use after disk full";
}
dir.setMaxSizeInBytes(thisDiskFree);
dir.setRandomIOExceptionRate(rate, diskFree);
try {
if (0 == x) {
int docId = 12;
for(int i=0;i<13;i++) {
reader.deleteDocument(docId);
reader.setNorm(docId, "contents", (float) 2.0);
docId += 12;
}
}
reader.close();
success = true;
if (0 == x) {
done = true;
}
} catch (IOException e) {
if (debug) {
System.out.println(" hit IOException: " + e);
}
err = e;
if (1 == x) {
e.printStackTrace();
fail(testName + " hit IOException after disk space was freed up");
}
}
// Whether we succeeded or failed, check that all
// un-referenced files were in fact deleted (ie,
// we did not create garbage). Just create a
// new IndexFileDeleter, have it delete
// unreferenced files, then verify that in fact
// no files were deleted:
String[] startFiles = dir.list();
SegmentInfos infos = new SegmentInfos();
infos.read(dir);
IndexFileDeleter d = new IndexFileDeleter(infos, dir);
d.findDeletableFiles();
d.deleteFiles();
String[] endFiles = dir.list();
Arrays.sort(startFiles);
Arrays.sort(endFiles);
//for(int i=0;i<startFiles.length;i++) {
// System.out.println(" startFiles: " + i + ": " + startFiles[i]);
//}
if (!Arrays.equals(startFiles, endFiles)) {
String successStr;
if (success) {
successStr = "success";
} else {
successStr = "IOException";
err.printStackTrace();
}
fail("reader.close() failed to delete unreferenced files after " + successStr + " (" + diskFree + " bytes): before delete:\n " + arrayToString(startFiles) + "\n after delete:\n " + arrayToString(endFiles));
}
// Finally, verify index is not corrupt, and, if
// we succeeded, we see all docs changed, and if
// we failed, we see either all docs or no docs
// changed (transactional semantics):
IndexReader newReader = null;
try {
newReader = IndexReader.open(dir);
} catch (IOException e) {
e.printStackTrace();
fail(testName + ":exception when creating IndexReader after disk full during close: " + e);
}
/*
int result = newReader.docFreq(searchTerm);
if (success) {
if (result != END_COUNT) {
fail(testName + ": method did not throw exception but docFreq('aaa') is " + result + " instead of expected " + END_COUNT);
}
} else {
// On hitting exception we still may have added
// all docs:
if (result != START_COUNT && result != END_COUNT) {
err.printStackTrace();
fail(testName + ": method did throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT + " or " + END_COUNT);
}
}
*/
IndexSearcher searcher = new IndexSearcher(newReader);
Hits hits = null;
try {
hits = searcher.search(new TermQuery(searchTerm));
} catch (IOException e) {
e.printStackTrace();
fail(testName + ": exception when searching: " + e);
}
int result2 = hits.length();
if (success) {
if (result2 != END_COUNT) {
fail(testName + ": method did not throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + END_COUNT);
}
} else {
// On hitting exception we still may have added
// all docs:
if (result2 != START_COUNT && result2 != END_COUNT) {
err.printStackTrace();
fail(testName + ": method did throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + START_COUNT);
}
}
searcher.close();
newReader.close();
if (result2 == END_COUNT) {
break;
}
}
dir.close();
// Try again with 10 more bytes of free space:
diskFree += 10;
}
}
private String arrayToString(String[] l) {
String s = "";
for(int i=0;i<l.length;i++) {
if (i > 0) {
s += "\n ";
}
s += l[i];
}
return s;
}
private void deleteReaderReaderConflict(boolean optimize) throws IOException private void deleteReaderReaderConflict(boolean optimize) throws IOException
{ {
Directory dir = getDirectory(true); Directory dir = getDirectory(true);
@ -697,4 +910,6 @@ public class TestIndexReader extends TestCase
} }
dir.delete(); dir.delete();
} }
} }

View File

@ -2,6 +2,7 @@ package org.apache.lucene.index;
import java.io.IOException; import java.io.IOException;
import java.io.File; import java.io.File;
import java.util.Arrays;
import junit.framework.TestCase; import junit.framework.TestCase;
@ -10,12 +11,16 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.MockRAMDirectory;
/** /**
* @author goller * @author goller
@ -88,6 +93,350 @@ public class TestIndexWriter extends TestCase
writer.addDocument(doc); writer.addDocument(doc);
} }
private void addDocWithIndex(IndexWriter writer, int index) throws IOException
{
Document doc = new Document();
doc.add(new Field("content", "aaa " + index, Field.Store.YES, Field.Index.TOKENIZED));
doc.add(new Field("id", "" + index, Field.Store.YES, Field.Index.TOKENIZED));
writer.addDocument(doc);
}
/*
Test: make sure when we run out of disk space or hit
random IOExceptions in any of the addIndexes(*) calls
that 1) index is not corrupt (searcher can open/search
it) and 2) transactional semantics are followed:
either all or none of the incoming documents were in
fact added.
*/
public void testAddIndexOnDiskFull() throws IOException
{
int START_COUNT = 57;
int NUM_DIR = 50;
int END_COUNT = START_COUNT + NUM_DIR*25;
boolean debug = false;
// Build up a bunch of dirs that have indexes which we
// will then merge together by calling addIndexes(*):
Directory[] dirs = new Directory[NUM_DIR];
long inputDiskUsage = 0;
for(int i=0;i<NUM_DIR;i++) {
dirs[i] = new RAMDirectory();
IndexWriter writer = new IndexWriter(dirs[i], new WhitespaceAnalyzer(), true);
for(int j=0;j<25;j++) {
addDocWithIndex(writer, 25*i+j);
}
writer.close();
String[] files = dirs[i].list();
for(int j=0;j<files.length;j++) {
inputDiskUsage += dirs[i].fileLength(files[j]);
}
}
// Now, build a starting index that has START_COUNT docs. We
// will then try to addIndexes into a copy of this:
RAMDirectory startDir = new RAMDirectory();
IndexWriter writer = new IndexWriter(startDir, new WhitespaceAnalyzer(), true);
for(int j=0;j<START_COUNT;j++) {
addDocWithIndex(writer, j);
}
writer.close();
// Make sure starting index seems to be working properly:
Term searchTerm = new Term("content", "aaa");
IndexReader reader = IndexReader.open(startDir);
assertEquals("first docFreq", 57, reader.docFreq(searchTerm));
IndexSearcher searcher = new IndexSearcher(reader);
Hits hits = searcher.search(new TermQuery(searchTerm));
assertEquals("first number of hits", 57, hits.length());
searcher.close();
reader.close();
// Iterate with larger and larger amounts of free
// disk space. With little free disk space,
// addIndexes will certainly run out of space &
// fail. Verify that when this happens, index is
// not corrupt and index in fact has added no
// documents. Then, we increase disk space by 1000
// bytes each iteration. At some point there is
// enough free disk space and addIndexes should
// succeed and index should show all documents were
// added.
// String[] files = startDir.list();
long diskUsage = startDir.sizeInBytes();
long startDiskUsage = 0;
String[] files = startDir.list();
for(int i=0;i<files.length;i++) {
startDiskUsage += startDir.fileLength(files[i]);
}
for(int method=0;method<3;method++) {
// Start with 100 bytes more than we are currently using:
long diskFree = diskUsage+100;
boolean success = false;
boolean done = false;
String methodName;
if (0 == method) {
methodName = "addIndexes(Directory[])";
} else if (1 == method) {
methodName = "addIndexes(IndexReader[])";
} else {
methodName = "addIndexesNoOptimize(Directory[])";
}
String testName = "disk full test for method " + methodName + " with disk full at " + diskFree + " bytes";
int cycleCount = 0;
while(!done) {
cycleCount++;
// Make a new dir that will enforce disk usage:
MockRAMDirectory dir = new MockRAMDirectory(startDir);
writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false);
IOException err = null;
for(int x=0;x<2;x++) {
// Two loops: first time, limit disk space &
// throw random IOExceptions; second time, no
// disk space limit:
double rate = 0.05;
double diskRatio = ((double) diskFree)/diskUsage;
long thisDiskFree;
if (0 == x) {
thisDiskFree = diskFree;
if (diskRatio >= 2.0) {
rate /= 2;
}
if (diskRatio >= 4.0) {
rate /= 2;
}
if (diskRatio >= 6.0) {
rate = 0.0;
}
if (debug) {
System.out.println("\ncycle: " + methodName + ": " + diskFree + " bytes");
}
} else {
thisDiskFree = 0;
rate = 0.0;
if (debug) {
System.out.println("\ncycle: " + methodName + ", same writer: unlimited disk space");
}
}
dir.setMaxSizeInBytes(thisDiskFree);
dir.setRandomIOExceptionRate(rate, diskFree);
try {
if (0 == method) {
writer.addIndexes(dirs);
} else if (1 == method) {
IndexReader readers[] = new IndexReader[dirs.length];
for(int i=0;i<dirs.length;i++) {
readers[i] = IndexReader.open(dirs[i]);
}
try {
writer.addIndexes(readers);
} finally {
for(int i=0;i<dirs.length;i++) {
readers[i].close();
}
}
} else {
writer.addIndexesNoOptimize(dirs);
}
success = true;
if (debug) {
System.out.println(" success!");
}
if (0 == x) {
done = true;
}
} catch (IOException e) {
success = false;
err = e;
if (debug) {
System.out.println(" hit IOException: " + e);
}
if (1 == x) {
e.printStackTrace();
fail(methodName + " hit IOException after disk space was freed up");
}
}
// Whether we succeeded or failed, check that all
// un-referenced files were in fact deleted (ie,
// we did not create garbage). Just create a
// new IndexFileDeleter, have it delete
// unreferenced files, then verify that in fact
// no files were deleted:
String[] startFiles = dir.list();
SegmentInfos infos = new SegmentInfos();
infos.read(dir);
IndexFileDeleter d = new IndexFileDeleter(infos, dir);
d.findDeletableFiles();
d.deleteFiles();
String[] endFiles = dir.list();
Arrays.sort(startFiles);
Arrays.sort(endFiles);
/*
for(int i=0;i<startFiles.length;i++) {
System.out.println(" " + i + ": " + startFiles[i]);
}
*/
if (!Arrays.equals(startFiles, endFiles)) {
String successStr;
if (success) {
successStr = "success";
} else {
successStr = "IOException";
err.printStackTrace();
}
fail(methodName + " failed to delete unreferenced files after " + successStr + " (" + diskFree + " bytes): before delete:\n " + arrayToString(startFiles) + "\n after delete:\n " + arrayToString(endFiles));
}
if (debug) {
System.out.println(" now test readers");
}
// Finally, verify index is not corrupt, and, if
// we succeeded, we see all docs added, and if we
// failed, we see either all docs or no docs added
// (transactional semantics):
try {
reader = IndexReader.open(dir);
} catch (IOException e) {
e.printStackTrace();
fail(testName + ": exception when creating IndexReader: " + e);
}
int result = reader.docFreq(searchTerm);
if (success) {
if (result != END_COUNT) {
fail(testName + ": method did not throw exception but docFreq('aaa') is " + result + " instead of expected " + END_COUNT);
}
} else {
// On hitting exception we still may have added
// all docs:
if (result != START_COUNT && result != END_COUNT) {
err.printStackTrace();
fail(testName + ": method did throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT + " or " + END_COUNT);
}
}
searcher = new IndexSearcher(reader);
try {
hits = searcher.search(new TermQuery(searchTerm));
} catch (IOException e) {
e.printStackTrace();
fail(testName + ": exception when searching: " + e);
}
int result2 = hits.length();
if (success) {
if (result2 != result) {
fail(testName + ": method did not throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + result);
}
} else {
// On hitting exception we still may have added
// all docs:
if (result2 != result) {
err.printStackTrace();
fail(testName + ": method did throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + result);
}
}
searcher.close();
reader.close();
if (debug) {
System.out.println(" count is " + result);
}
if (result == END_COUNT) {
break;
}
}
// Javadocs state that temp free Directory space
// required is at most 2X total input size of
// indices so let's make sure:
assertTrue("max free Directory space required exceeded 1X the total input index sizes during " + methodName +
": max temp usage = " + (dir.getMaxUsedSizeInBytes()-startDiskUsage) + " bytes; " +
"starting disk usage = " + startDiskUsage + " bytes; " +
"input index disk usage = " + inputDiskUsage + " bytes",
(dir.getMaxUsedSizeInBytes()-startDiskUsage) < 2*(startDiskUsage + inputDiskUsage));
writer.close();
dir.close();
// Try again with 1000 more bytes of free space:
diskFree += 1000;
}
}
startDir.close();
}
/**
* Make sure optimize doesn't use any more than 1X
* starting index size as its temporary free space
* required.
*/
public void testOptimizeTempSpaceUsage() throws IOException {
MockRAMDirectory dir = new MockRAMDirectory();
IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
for(int j=0;j<500;j++) {
addDocWithIndex(writer, j);
}
writer.close();
long startDiskUsage = 0;
String[] files = dir.list();
for(int i=0;i<files.length;i++) {
startDiskUsage += dir.fileLength(files[i]);
}
dir.resetMaxUsedSizeInBytes();
writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false);
writer.optimize();
writer.close();
long maxDiskUsage = dir.getMaxUsedSizeInBytes();
assertTrue("optimized used too much temporary space: starting usage was " + startDiskUsage + " bytes; max temp usage was " + maxDiskUsage + " but should have been " + (2*startDiskUsage) + " (= 2X starting usage)",
maxDiskUsage <= 2*startDiskUsage);
}
private String arrayToString(String[] l) {
String s = "";
for(int i=0;i<l.length;i++) {
if (i > 0) {
s += "\n ";
}
s += l[i];
}
return s;
}
// Make sure we can open an index for create even when a // Make sure we can open an index for create even when a
// reader holds it open (this fails pre lock-less // reader holds it open (this fails pre lock-less
// commits on windows): // commits on windows):
@ -276,3 +625,5 @@ public class TestIndexWriter extends TestCase
dir.delete(); dir.delete();
} }
} }

View File

@ -33,6 +33,8 @@ import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.English; import org.apache.lucene.util.English;
import org.apache.lucene.store.MockRAMDirectory;
/** /**
* JUnit testcase to test RAMDirectory. RAMDirectory itself is used in many testcases, * JUnit testcase to test RAMDirectory. RAMDirectory itself is used in many testcases,
* but not one of them uses an different constructor other than the default constructor. * but not one of them uses an different constructor other than the default constructor.
@ -70,7 +72,7 @@ public class TestRAMDirectory extends TestCase {
public void testRAMDirectory () throws IOException { public void testRAMDirectory () throws IOException {
Directory dir = FSDirectory.getDirectory(indexDir, false); Directory dir = FSDirectory.getDirectory(indexDir, false);
RAMDirectory ramDir = new RAMDirectory(dir); MockRAMDirectory ramDir = new MockRAMDirectory(dir);
// close the underlaying directory // close the underlaying directory
dir.close(); dir.close();
@ -98,7 +100,7 @@ public class TestRAMDirectory extends TestCase {
public void testRAMDirectoryFile () throws IOException { public void testRAMDirectoryFile () throws IOException {
RAMDirectory ramDir = new RAMDirectory(indexDir); MockRAMDirectory ramDir = new MockRAMDirectory(indexDir);
// Check size // Check size
assertEquals(ramDir.sizeInBytes(), ramDir.getRecomputedSizeInBytes()); assertEquals(ramDir.sizeInBytes(), ramDir.getRecomputedSizeInBytes());
@ -123,7 +125,7 @@ public class TestRAMDirectory extends TestCase {
public void testRAMDirectoryString () throws IOException { public void testRAMDirectoryString () throws IOException {
RAMDirectory ramDir = new RAMDirectory(indexDir.getCanonicalPath()); MockRAMDirectory ramDir = new MockRAMDirectory(indexDir.getCanonicalPath());
// Check size // Check size
assertEquals(ramDir.sizeInBytes(), ramDir.getRecomputedSizeInBytes()); assertEquals(ramDir.sizeInBytes(), ramDir.getRecomputedSizeInBytes());
@ -151,7 +153,7 @@ public class TestRAMDirectory extends TestCase {
public void testRAMDirectorySize() throws IOException, InterruptedException { public void testRAMDirectorySize() throws IOException, InterruptedException {
final RAMDirectory ramDir = new RAMDirectory(indexDir.getCanonicalPath()); final MockRAMDirectory ramDir = new MockRAMDirectory(indexDir.getCanonicalPath());
final IndexWriter writer = new IndexWriter(ramDir, new WhitespaceAnalyzer(), false); final IndexWriter writer = new IndexWriter(ramDir, new WhitespaceAnalyzer(), false);
writer.optimize(); writer.optimize();

View File

@ -0,0 +1,130 @@
package org.apache.lucene.store;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.io.File;
import java.util.Iterator;
import java.util.Random;
/**
* This is a subclass of RAMDirectory that adds methods
* intented to be used only by unit tests.
* @version $Id: RAMDirectory.java 437897 2006-08-29 01:13:10Z yonik $
*/
public class MockRAMDirectory extends RAMDirectory {
long maxSize;
// Max actual bytes used. This is set by MockRAMOutputStream:
long maxUsedSize;
double randomIOExceptionRate;
Random randomState;
public MockRAMDirectory() throws IOException {
super();
}
public MockRAMDirectory(String dir) throws IOException {
super(dir);
}
public MockRAMDirectory(Directory dir) throws IOException {
super(dir);
}
public MockRAMDirectory(File dir) throws IOException {
super(dir);
}
public void setMaxSizeInBytes(long maxSize) {
this.maxSize = maxSize;
}
public long getMaxSizeInBytes() {
return this.maxSize;
}
/**
* Returns the peek actual storage used (bytes) in this
* directory.
*/
public long getMaxUsedSizeInBytes() {
return this.maxUsedSize;
}
public void resetMaxUsedSizeInBytes() {
this.maxUsedSize = getRecomputedActualSizeInBytes();
}
/**
* If 0.0, no exceptions will be thrown. Else this should
* be a double 0.0 - 1.0. We will randomly throw an
* IOException on the first write to an OutputStream based
* on this probability.
*/
public void setRandomIOExceptionRate(double rate, long seed) {
randomIOExceptionRate = rate;
// seed so we have deterministic behaviour:
randomState = new Random(seed);
}
public double getRandomIOExceptionRate() {
return randomIOExceptionRate;
}
void maybeThrowIOException() throws IOException {
if (randomIOExceptionRate > 0.0) {
int number = Math.abs(randomState.nextInt() % 1000);
if (number < randomIOExceptionRate*1000) {
throw new IOException("a random IOException");
}
}
}
public IndexOutput createOutput(String name) {
RAMFile file = new RAMFile(this);
synchronized (this) {
RAMFile existing = (RAMFile)fileMap.get(name);
if (existing!=null) {
sizeInBytes -= existing.sizeInBytes;
existing.directory = null;
}
fileMap.put(name, file);
}
return new MockRAMOutputStream(this, file);
}
/** Provided for testing purposes. Use sizeInBytes() instead. */
public synchronized final long getRecomputedSizeInBytes() {
long size = 0;
Iterator it = files.iterator();
while (it.hasNext())
size += ((RAMFile) it.next()).getSizeInBytes();
return size;
}
/** Like getRecomputedSizeInBytes(), but, uses actual file
* lengths rather than buffer allocations (which are
* quantized up to nearest
* BufferedIndexOutput.BUFFER_SIZE (now 1024) bytes.
*/
final long getRecomputedActualSizeInBytes() {
long size = 0;
Iterator it = files.iterator();
while (it.hasNext())
size += ((RAMFile) it.next()).length;
return size;
}
}

View File

@ -0,0 +1,83 @@
package org.apache.lucene.store;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Iterator;
/**
* Used by MockRAMDirectory to create an output stream that
* will throw an IOException on fake disk full, track max
* disk space actually used, and maybe throw random
* IOExceptions.
*/
public class MockRAMOutputStream extends RAMOutputStream {
private MockRAMDirectory dir;
private boolean first=true;
/** Construct an empty output buffer. */
public MockRAMOutputStream(MockRAMDirectory dir, RAMFile f) {
super(f);
this.dir = dir;
}
public void close() throws IOException {
super.close();
// Now compute actual disk usage & track the maxUsedSize
// in the MockRAMDirectory:
long size = dir.getRecomputedActualSizeInBytes();
if (size > dir.maxUsedSize) {
dir.maxUsedSize = size;
}
}
public void flushBuffer(byte[] src, int len) throws IOException {
long freeSpace = dir.maxSize - dir.sizeInBytes();
long realUsage = 0;
// Enforce disk full:
if (dir.maxSize != 0 && freeSpace <= len) {
// Compute the real disk free. This will greatly slow
// down our test but makes it more accurate:
realUsage = dir.getRecomputedActualSizeInBytes();
freeSpace = dir.maxSize - realUsage;
}
if (dir.maxSize != 0 && freeSpace <= len) {
if (freeSpace > 0 && freeSpace < len) {
realUsage += freeSpace;
super.flushBuffer(src, (int) freeSpace);
}
if (realUsage > dir.maxUsedSize) {
dir.maxUsedSize = realUsage;
}
throw new IOException("fake disk full at " + dir.sizeInBytes() + " bytes");
} else {
super.flushBuffer(src, len);
}
if (first) {
// Maybe throw random exception; only do this on first
// write to a new file:
first = false;
dir.maybeThrowIOException();
}
}
}