From d6823ef3746e7c7dba3aa64571d1ec04db620c0b Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Mon, 18 Dec 2006 16:45:29 +0000 Subject: [PATCH] LUCENE-702: make sure addIndexes(*) does not corrupt index on disk full; change to transactional semantics; fix IndexWriter.mergeSegments and IndexReader.commit to clean up (and leave instance consistent) on exception git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@488330 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 9 + .../apache/lucene/index/IndexFileDeleter.java | 87 ++- .../org/apache/lucene/index/IndexReader.java | 92 ++- .../org/apache/lucene/index/IndexWriter.java | 572 ++++++++++++++---- .../org/apache/lucene/index/MultiReader.java | 14 + .../org/apache/lucene/index/SegmentInfo.java | 28 + .../org/apache/lucene/index/SegmentInfos.java | 52 +- .../apache/lucene/index/SegmentReader.java | 40 ++ .../org/apache/lucene/store/RAMDirectory.java | 21 +- .../apache/lucene/store/RAMOutputStream.java | 2 +- .../lucene/index/TestIndexFileDeleter.java | 12 +- .../apache/lucene/index/TestIndexReader.java | 215 +++++++ .../apache/lucene/index/TestIndexWriter.java | 351 +++++++++++ .../lucene/index/store/TestRAMDirectory.java | 10 +- .../apache/lucene/store/MockRAMDirectory.java | 130 ++++ .../lucene/store/MockRAMOutputStream.java | 83 +++ 16 files changed, 1540 insertions(+), 178 deletions(-) create mode 100644 src/test/org/apache/lucene/store/MockRAMDirectory.java create mode 100644 src/test/org/apache/lucene/store/MockRAMOutputStream.java diff --git a/CHANGES.txt b/CHANGES.txt index cc88a64cc9a..68eeae8fded 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -240,6 +240,15 @@ Bug fixes "The handle is invalid" IOExceptions on Windows when trying to close readers or writers. (Michael Busch via Mike McCandless). +26. LUCENE-702: Fix IndexWriter.addIndexes(*) to not corrupt the index + on any exceptions (eg disk full). The semantics of these methods + is now transactional: either all indices are merged or none are. + Also fixed IndexWriter.mergeSegments (called outside of + addIndexes(*) by addDocument, optimize, flushRamSegments) and + IndexReader.commit() (called by close) to clean up and keep the + instance state consistent to what's actually in the index (Mike + McCandless). + Optimizations 1. LUCENE-586: TermDocs.skipTo() is now more efficient for diff --git a/src/java/org/apache/lucene/index/IndexFileDeleter.java b/src/java/org/apache/lucene/index/IndexFileDeleter.java index 43f144a419b..bb602132337 100644 --- a/src/java/org/apache/lucene/index/IndexFileDeleter.java +++ b/src/java/org/apache/lucene/index/IndexFileDeleter.java @@ -26,6 +26,8 @@ import java.io.IOException; import java.io.PrintStream; import java.util.Vector; import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; /** * A utility class (used by both IndexReader and @@ -35,7 +37,7 @@ import java.util.HashMap; */ public class IndexFileDeleter { private Vector deletable; - private Vector pending; + private HashSet pending; private Directory directory; private SegmentInfos segmentInfos; private PrintStream infoStream; @@ -45,6 +47,12 @@ public class IndexFileDeleter { this.segmentInfos = segmentInfos; this.directory = directory; } + void setSegmentInfos(SegmentInfos segmentInfos) { + this.segmentInfos = segmentInfos; + } + SegmentInfos getSegmentInfos() { + return segmentInfos; + } void setInfoStream(PrintStream infoStream) { this.infoStream = infoStream; @@ -134,6 +142,10 @@ public class IndexFileDeleter { // This is an orphan'd separate norms file: doDelete = true; } + } else if ("cfs".equals(extension) && !info.getUseCompoundFile()) { + // This is a partially written + // _segmentName.cfs: + doDelete = true; } } } @@ -167,6 +179,30 @@ public class IndexFileDeleter { deleteFiles(reader.files(), reader.directory()); // delete other files } } + + /** + * Delete these segments, as long as they are not listed + * in protectedSegments. If they are, then, instead, add + * them to the pending set. + */ + + public final void deleteSegments(Vector segments, HashSet protectedSegments) throws IOException { + + deleteFiles(); // try to delete files that we couldn't before + + for (int i = 0; i < segments.size(); i++) { + SegmentReader reader = (SegmentReader)segments.elementAt(i); + if (reader.directory() == this.directory) { + if (protectedSegments.contains(reader.getSegmentName())) { + addPendingFiles(reader.files()); // record these for deletion on commit + } else { + deleteFiles(reader.files()); // try to delete our files + } + } else { + deleteFiles(reader.files(), reader.directory()); // delete other files + } + } + } public final void deleteFiles(Vector files, Directory directory) throws IOException { @@ -199,22 +235,51 @@ public class IndexFileDeleter { pending = null; } - final void addPendingFile(String fileName) { - if (pending == null) { - pending = new Vector(); + /* + Record that the files for these segments should be + deleted, once the pending deletes are committed. + */ + final void addPendingSegments(Vector segments) throws IOException { + for (int i = 0; i < segments.size(); i++) { + SegmentReader reader = (SegmentReader)segments.elementAt(i); + if (reader.directory() == this.directory) { + addPendingFiles(reader.files()); + } } - pending.addElement(fileName); } - final void commitPendingFiles() { + /* + Record list of files for deletion, but do not delete + them until commitPendingFiles is called. + */ + final void addPendingFiles(Vector files) { + for(int i=0;iNote that this requires temporary free space in the + * Directory up to the size of the starting index (exact + * usage could be less but will depend on many + * factors).

+ + *

If an Exception is hit during optimize() (eg, due to + * disk full), the index will not be corrupted. However + * it's possible that one of the segments in the index + * will be in non-CFS format even when using compound file + * format. This will occur when the Exception is hit + * during conversion of the segment into compound + * format.

+ */ public synchronized void optimize() throws IOException { flushRamSegments(); while (segmentInfos.size() > 1 || @@ -579,6 +607,85 @@ public class IndexWriter { } } + /* + * Begin a transaction. During a transaction, any segment + * merges that happen (or ram segments flushed) will not + * write a new segments file and will not remove any files + * that were present at the start of the transaction. You + * must make a matched (try/finall) call to + * commitTransaction() or rollbackTransaction() to finish + * the transaction. + */ + private void startTransaction() throws IOException { + if (inTransaction) { + throw new IOException("transaction is already in process"); + } + rollbackSegmentInfos = (SegmentInfos) segmentInfos.clone(); + protectedSegments = new HashSet(); + for(int i=0;iThis may be used to parallelize batch indexing. A large document @@ -587,27 +694,68 @@ public class IndexWriter { * complete index can then be created by merging sub-collection indexes * with this method. * - *

After this completes, the index is optimized. */ + *

After this completes, the index is optimized. + * + *

This method is transactional in how Exceptions are + * handled: it does not commit a new segments_N file until + * all indexes are added. This means if an Exception + * occurs (eg disk full), then either no indexes will have + * been added or they all will have been.

+ * + *

If an Exception is hit, it's still possible that all + * indexes were successfully added. This happens when the + * Exception is hit when trying to build a CFS file. In + * this case, one segment in the index will be in non-CFS + * format, even when using compound file format.

+ * + *

Also note that on an Exception, the index may still + * have been partially or fully optimized even though none + * of the input indexes were added.

+ * + *

Note that this requires temporary free space in the + * Directory up to 2X the sum of all input indexes + * (including the starting index). Exact usage could be + * less but will depend on many factors.

+ * + *

See LUCENE-702 + * for details.

+ */ public synchronized void addIndexes(Directory[] dirs) - throws IOException { + throws IOException { + optimize(); // start with zero or 1 seg int start = segmentInfos.size(); - for (int i = 0; i < dirs.length; i++) { - SegmentInfos sis = new SegmentInfos(); // read infos from dir - sis.read(dirs[i]); - for (int j = 0; j < sis.size(); j++) { - segmentInfos.addElement(sis.info(j)); // add each info - } - } + boolean success = false; - // merge newly added segments in log(n) passes - while (segmentInfos.size() > start+mergeFactor) { - for (int base = start; base < segmentInfos.size(); base++) { - int end = Math.min(segmentInfos.size(), base+mergeFactor); - if (end-base > 1) - mergeSegments(segmentInfos, base, end); + startTransaction(); + + try { + for (int i = 0; i < dirs.length; i++) { + SegmentInfos sis = new SegmentInfos(); // read infos from dir + sis.read(dirs[i]); + for (int j = 0; j < sis.size(); j++) { + segmentInfos.addElement(sis.info(j)); // add each info + } + } + + // merge newly added segments in log(n) passes + while (segmentInfos.size() > start+mergeFactor) { + for (int base = start; base < segmentInfos.size(); base++) { + int end = Math.min(segmentInfos.size(), base+mergeFactor); + if (end-base > 1) { + mergeSegments(segmentInfos, base, end); + } + } + } + success = true; + } finally { + if (success) { + commitTransaction(); + } else { + rollbackTransaction(); } } @@ -623,6 +771,11 @@ public class IndexWriter { *

* This requires this index not be among those to be added, and the * upper bound* of those segment doc counts not exceed maxMergeDocs. + * + *

See {@link #addIndexes(Directory[])} for + * details on transactional semantics, temporary free + * space required in the Directory, and non-CFS segments + * on an Exception.

*/ public synchronized void addIndexesNoOptimize(Directory[] dirs) throws IOException { @@ -651,96 +804,114 @@ public class IndexWriter { // and target may use compound file or not. So we use mergeSegments() to // copy a segment, which may cause doc count to change because deleted // docs are garbage collected. - // - // In current addIndexes(Directory[]), segment infos in S are added to - // T's "segmentInfos" upfront. Then segments in S are merged to T several - // at a time. Every merge is committed with T's "segmentInfos". So if - // a reader is opened on T while addIndexes() is going on, it could see - // an inconsistent index. AddIndexesNoOptimize() has a similar behaviour. // 1 flush ram segments + flushRamSegments(); // 2 copy segment infos and find the highest level from dirs int start = segmentInfos.size(); int startUpperBound = minMergeDocs; + boolean success = false; + + startTransaction(); + try { - for (int i = 0; i < dirs.length; i++) { - if (directory == dirs[i]) { - // cannot add this index: segments may be deleted in merge before added - throw new IllegalArgumentException("Cannot add this index to itself"); - } - SegmentInfos sis = new SegmentInfos(); // read infos from dir - sis.read(dirs[i]); - for (int j = 0; j < sis.size(); j++) { - SegmentInfo info = sis.info(j); - segmentInfos.addElement(info); // add each info + try { + for (int i = 0; i < dirs.length; i++) { + if (directory == dirs[i]) { + // cannot add this index: segments may be deleted in merge before added + throw new IllegalArgumentException("Cannot add this index to itself"); + } - while (startUpperBound < info.docCount) { - startUpperBound *= mergeFactor; // find the highest level from dirs - if (startUpperBound > maxMergeDocs) { - // upper bound cannot exceed maxMergeDocs - throw new IllegalArgumentException("Upper bound cannot exceed maxMergeDocs"); + SegmentInfos sis = new SegmentInfos(); // read infos from dir + sis.read(dirs[i]); + for (int j = 0; j < sis.size(); j++) { + SegmentInfo info = sis.info(j); + segmentInfos.addElement(info); // add each info + + while (startUpperBound < info.docCount) { + startUpperBound *= mergeFactor; // find the highest level from dirs + if (startUpperBound > maxMergeDocs) { + // upper bound cannot exceed maxMergeDocs + throw new IllegalArgumentException("Upper bound cannot exceed maxMergeDocs"); + } } } } + } catch (IllegalArgumentException e) { + for (int i = segmentInfos.size() - 1; i >= start; i--) { + segmentInfos.remove(i); + } + throw e; } - } catch (IllegalArgumentException e) { - for (int i = segmentInfos.size() - 1; i >= start; i--) { - segmentInfos.remove(i); + + // 3 maybe merge segments starting from the highest level from dirs + maybeMergeSegments(startUpperBound); + + // get the tail segments whose levels <= h + int segmentCount = segmentInfos.size(); + int numTailSegments = 0; + while (numTailSegments < segmentCount + && startUpperBound >= segmentInfos.info(segmentCount - 1 - numTailSegments).docCount) { + numTailSegments++; } - throw e; - } - - // 3 maybe merge segments starting from the highest level from dirs - maybeMergeSegments(startUpperBound); - - // get the tail segments whose levels <= h - int segmentCount = segmentInfos.size(); - int numTailSegments = 0; - while (numTailSegments < segmentCount - && startUpperBound >= segmentInfos.info(segmentCount - 1 - numTailSegments).docCount) { - numTailSegments++; - } - if (numTailSegments == 0) { - return; - } - - // 4 make sure invariants hold for the tail segments whose levels <= h - if (checkNonDecreasingLevels(segmentCount - numTailSegments)) { - // identify the segments from S to be copied (not merged in 3) - int numSegmentsToCopy = 0; - while (numSegmentsToCopy < segmentCount - && directory != segmentInfos.info(segmentCount - 1 - numSegmentsToCopy).dir) { - numSegmentsToCopy++; - } - if (numSegmentsToCopy == 0) { + if (numTailSegments == 0) { + success = true; return; } - // copy those segments from S - for (int i = segmentCount - numSegmentsToCopy; i < segmentCount; i++) { - mergeSegments(segmentInfos, i, i + 1); - } - if (checkNonDecreasingLevels(segmentCount - numSegmentsToCopy)) { - return; - } - } + // 4 make sure invariants hold for the tail segments whose levels <= h + if (checkNonDecreasingLevels(segmentCount - numTailSegments)) { + // identify the segments from S to be copied (not merged in 3) + int numSegmentsToCopy = 0; + while (numSegmentsToCopy < segmentCount + && directory != segmentInfos.info(segmentCount - 1 - numSegmentsToCopy).dir) { + numSegmentsToCopy++; + } + if (numSegmentsToCopy == 0) { + success = true; + return; + } - // invariants do not hold, simply merge those segments - mergeSegments(segmentInfos, segmentCount - numTailSegments, segmentCount); + // copy those segments from S + for (int i = segmentCount - numSegmentsToCopy; i < segmentCount; i++) { + mergeSegments(segmentInfos, i, i + 1); + } + if (checkNonDecreasingLevels(segmentCount - numSegmentsToCopy)) { + success = true; + return; + } + } - // maybe merge segments again if necessary - if (segmentInfos.info(segmentInfos.size() - 1).docCount > startUpperBound) { - maybeMergeSegments(startUpperBound * mergeFactor); + // invariants do not hold, simply merge those segments + mergeSegments(segmentInfos, segmentCount - numTailSegments, segmentCount); + + // maybe merge segments again if necessary + if (segmentInfos.info(segmentInfos.size() - 1).docCount > startUpperBound) { + maybeMergeSegments(startUpperBound * mergeFactor); + } + + success = true; + } finally { + if (success) { + commitTransaction(); + } else { + rollbackTransaction(); + } } } /** Merges the provided indexes into this index. *

After this completes, the index is optimized.

*

The provided IndexReaders are not closed.

+ + *

See {@link #addIndexes(Directory[])} for + * details on transactional semantics, temporary free + * space required in the Directory, and non-CFS segments + * on an Exception.

*/ public synchronized void addIndexes(IndexReader[] readers) throws IOException { @@ -761,26 +932,61 @@ public class IndexWriter { for (int i = 0; i < readers.length; i++) // add new indexes merger.add(readers[i]); - int docCount = merger.merge(); // merge 'em - - segmentInfos.setSize(0); // pop old infos & add new - SegmentInfo info = new SegmentInfo(mergedName, docCount, directory, false); - segmentInfos.addElement(info); - - if(sReader != null) - sReader.close(); + SegmentInfo info; String segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName(); - segmentInfos.write(directory); // commit changes + + boolean success = false; + + startTransaction(); + + try { + int docCount = merger.merge(); // merge 'em + + segmentInfos.setSize(0); // pop old infos & add new + info = new SegmentInfo(mergedName, docCount, directory, false); + segmentInfos.addElement(info); + commitPending = true; + + if(sReader != null) + sReader.close(); + + success = true; + + } finally { + if (!success) { + rollbackTransaction(); + } else { + commitTransaction(); + } + } deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file deleter.deleteSegments(segmentsToDelete); // delete now-unused segments if (useCompoundFile) { - Vector filesToDelete = merger.createCompoundFile(mergedName + ".cfs"); + success = false; + segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName(); - info.setUseCompoundFile(true); - segmentInfos.write(directory); // commit again so readers know we've switched this segment to a compound file + Vector filesToDelete; + + startTransaction(); + + try { + + filesToDelete = merger.createCompoundFile(mergedName + ".cfs"); + + info.setUseCompoundFile(true); + commitPending = true; + success = true; + + } finally { + if (!success) { + rollbackTransaction(); + } else { + commitTransaction(); + } + } deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file deleter.deleteFiles(filesToDelete); // delete now unused files of segment @@ -884,6 +1090,7 @@ public class IndexWriter { // mergeFactor and/or maxBufferedDocs change(s) while (numSegments >= mergeFactor) { // merge the leftmost* mergeFactor segments + int docCount = mergeSegments(segmentInfos, minSegment, minSegment + mergeFactor); numSegments -= mergeFactor; @@ -921,51 +1128,154 @@ public class IndexWriter { SegmentMerger merger = new SegmentMerger(this, mergedName); final Vector segmentsToDelete = new Vector(); - for (int i = minSegment; i < end; i++) { - SegmentInfo si = sourceSegments.info(i); - if (infoStream != null) - infoStream.print(" " + si.name + " (" + si.docCount + " docs)"); - IndexReader reader = SegmentReader.get(si); - merger.add(reader); - if ((reader.directory() == this.directory) || // if we own the directory - (reader.directory() == this.ramDirectory)) - segmentsToDelete.addElement(reader); // queue segment for deletion - } - - int mergedDocCount = merger.merge(); - - if (infoStream != null) { - infoStream.println(" into "+mergedName+" ("+mergedDocCount+" docs)"); - } - - SegmentInfo newSegment = new SegmentInfo(mergedName, mergedDocCount, - directory, false); - if (sourceSegments == ramSegmentInfos) { - sourceSegments.removeAllElements(); - segmentInfos.addElement(newSegment); - } else { - for (int i = end-1; i > minSegment; i--) // remove old infos & add new - sourceSegments.remove(i); - segmentInfos.set(minSegment, newSegment); - } - - // close readers before we attempt to delete now-obsolete segments - merger.closeReaders(); String segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName(); - segmentInfos.write(directory); // commit before deleting + String nextSegmentsFileName = segmentInfos.getNextSegmentFileName(); - deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file - deleter.deleteSegments(segmentsToDelete); // delete now-unused segments + SegmentInfo newSegment = null; + + int mergedDocCount; + + // This is try/finally to make sure merger's readers are closed: + try { + + for (int i = minSegment; i < end; i++) { + SegmentInfo si = sourceSegments.info(i); + if (infoStream != null) + infoStream.print(" " + si.name + " (" + si.docCount + " docs)"); + IndexReader reader = SegmentReader.get(si); + merger.add(reader); + if ((reader.directory() == this.directory) || // if we own the directory + (reader.directory() == this.ramDirectory)) + segmentsToDelete.addElement(reader); // queue segment for deletion + } + + SegmentInfos rollback = null; + boolean success = false; + + // This is try/finally to rollback our internal state + // if we hit exception when doing the merge: + try { + + mergedDocCount = merger.merge(); + + if (infoStream != null) { + infoStream.println(" into "+mergedName+" ("+mergedDocCount+" docs)"); + } + + newSegment = new SegmentInfo(mergedName, mergedDocCount, + directory, false); + + + if (sourceSegments == ramSegmentInfos) { + segmentInfos.addElement(newSegment); + } else { + + if (!inTransaction) { + // Now save the SegmentInfo instances that + // we are replacing: + rollback = (SegmentInfos) segmentInfos.clone(); + } + + for (int i = end-1; i > minSegment; i--) // remove old infos & add new + sourceSegments.remove(i); + + segmentInfos.set(minSegment, newSegment); + } + + if (!inTransaction) { + segmentInfos.write(directory); // commit before deleting + } else { + commitPending = true; + } + + success = true; + + } finally { + + if (success) { + // The non-ram-segments case is already committed + // (above), so all the remains for ram segments case + // is to clear the ram segments: + if (sourceSegments == ramSegmentInfos) { + ramSegmentInfos.removeAllElements(); + } + } else if (!inTransaction) { + + // Must rollback so our state matches index: + + if (sourceSegments == ramSegmentInfos) { + // Simple case: newSegment may or may not have + // been added to the end of our segment infos, + // so just check & remove if so: + if (newSegment != null && + segmentInfos.size() > 0 && + segmentInfos.info(segmentInfos.size()-1) == newSegment) { + segmentInfos.remove(segmentInfos.size()-1); + } + } else if (rollback != null) { + // Rollback the individual SegmentInfo + // instances, but keep original SegmentInfos + // instance (so we don't try to write again the + // same segments_N file -- write once): + segmentInfos.clear(); + segmentInfos.addAll(rollback); + } + + // Delete any partially created files: + deleter.deleteFile(nextSegmentsFileName); + deleter.findDeletableFiles(); + deleter.deleteFiles(); + } + } + } finally { + // close readers before we attempt to delete now-obsolete segments + merger.closeReaders(); + } + + if (!inTransaction) { + deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file + deleter.deleteSegments(segmentsToDelete); // delete now-unused segments + } else { + deleter.addPendingFile(segmentsInfosFileName); // delete old segments_N file + deleter.deleteSegments(segmentsToDelete, protectedSegments); // delete now-unused segments + } if (useCompoundFile) { - Vector filesToDelete = merger.createCompoundFile(mergedName + ".cfs"); - segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName(); - newSegment.setUseCompoundFile(true); - segmentInfos.write(directory); // commit again so readers know we've switched this segment to a compound file + segmentsInfosFileName = nextSegmentsFileName; + nextSegmentsFileName = segmentInfos.getNextSegmentFileName(); - deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file + Vector filesToDelete; + + boolean success = false; + + try { + + filesToDelete = merger.createCompoundFile(mergedName + ".cfs"); + newSegment.setUseCompoundFile(true); + if (!inTransaction) { + segmentInfos.write(directory); // commit again so readers know we've switched this segment to a compound file + } + success = true; + + } finally { + if (!success && !inTransaction) { + // Must rollback: + newSegment.setUseCompoundFile(false); + deleter.deleteFile(mergedName + ".cfs"); + deleter.deleteFile(nextSegmentsFileName); + } + } + + if (!inTransaction) { + deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file + } + + // We can delete these segments whether or not we are + // in a transaction because we had just written them + // above so they can't need protection by the + // transaction: deleter.deleteFiles(filesToDelete); // delete now-unused segments } diff --git a/src/java/org/apache/lucene/index/MultiReader.java b/src/java/org/apache/lucene/index/MultiReader.java index 7c53cbe9556..5a9199efe59 100644 --- a/src/java/org/apache/lucene/index/MultiReader.java +++ b/src/java/org/apache/lucene/index/MultiReader.java @@ -230,6 +230,20 @@ public class MultiReader extends IndexReader { subReaders[i].commit(); } + void startCommit() { + super.startCommit(); + for (int i = 0; i < subReaders.length; i++) { + subReaders[i].startCommit(); + } + } + + void rollbackCommit() { + super.rollbackCommit(); + for (int i = 0; i < subReaders.length; i++) { + subReaders[i].rollbackCommit(); + } + } + protected synchronized void doClose() throws IOException { for (int i = 0; i < subReaders.length; i++) subReaders[i].close(); diff --git a/src/java/org/apache/lucene/index/SegmentInfo.java b/src/java/org/apache/lucene/index/SegmentInfo.java index 9bffc1ebd01..e1860c0eb63 100644 --- a/src/java/org/apache/lucene/index/SegmentInfo.java +++ b/src/java/org/apache/lucene/index/SegmentInfo.java @@ -62,6 +62,23 @@ final class SegmentInfo { preLockless = false; } + /** + * Copy everything from src SegmentInfo into our instance. + */ + void reset(SegmentInfo src) { + name = src.name; + docCount = src.docCount; + dir = src.dir; + preLockless = src.preLockless; + delGen = src.delGen; + if (src.normGen == null) { + normGen = null; + } else { + normGen = new long[src.normGen.length]; + System.arraycopy(src.normGen, 0, normGen, 0, src.normGen.length); + } + isCompoundFile = src.isCompoundFile; + } /** * Construct a new SegmentInfo instance by reading a @@ -151,6 +168,17 @@ final class SegmentInfo { delGen = -1; } + public Object clone () { + SegmentInfo si = new SegmentInfo(name, docCount, dir); + si.isCompoundFile = isCompoundFile; + si.delGen = delGen; + si.preLockless = preLockless; + if (normGen != null) { + si.normGen = (long[]) normGen.clone(); + } + return si; + } + String getDelFileName() { if (delGen == -1) { // In this case we know there is no deletion filename diff --git a/src/java/org/apache/lucene/index/SegmentInfos.java b/src/java/org/apache/lucene/index/SegmentInfos.java index e9d3e5162f4..bebb83df9af 100644 --- a/src/java/org/apache/lucene/index/SegmentInfos.java +++ b/src/java/org/apache/lucene/index/SegmentInfos.java @@ -50,7 +50,11 @@ public final class SegmentInfos extends Vector { * starting with the current time in milliseconds forces to create unique version numbers. */ private long version = System.currentTimeMillis(); - private long generation = 0; // generation of the "segments_N" file we read + + private long generation = 0; // generation of the "segments_N" for the next commit + private long lastGeneration = 0; // generation of the "segments_N" file we last successfully read + // or wrote; this is normally the same as generation except if + // there was an IOException that had interrupted a commit /** * If non-null, information about loading segments_N files @@ -132,12 +136,28 @@ public final class SegmentInfos extends Vector { } /** - * Get the segment_N filename in use by this segment infos. + * Get the segments_N filename in use by this segment infos. */ public String getCurrentSegmentFileName() { return IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", - generation); + lastGeneration); + } + + /** + * Get the next segments_N filename that will be written. + */ + public String getNextSegmentFileName() { + long nextGeneration; + + if (generation == -1) { + nextGeneration = 1; + } else { + nextGeneration = generation+1; + } + return IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, + "", + nextGeneration); } /** @@ -158,6 +178,7 @@ public final class SegmentInfos extends Vector { generation = Long.parseLong(segmentFileName.substring(1+IndexFileNames.SEGMENTS.length()), Character.MAX_RADIX); } + lastGeneration = generation; try { int format = input.readInt(); @@ -199,7 +220,7 @@ public final class SegmentInfos extends Vector { */ public final void read(Directory directory) throws IOException { - generation = -1; + generation = lastGeneration = -1; new FindSegmentsFile(directory) { @@ -212,6 +233,8 @@ public final class SegmentInfos extends Vector { public final void write(Directory directory) throws IOException { + String segmentFileName = getNextSegmentFileName(); + // Always advance the generation on write: if (generation == -1) { generation = 1; @@ -219,7 +242,6 @@ public final class SegmentInfos extends Vector { generation++; } - String segmentFileName = getCurrentSegmentFileName(); IndexOutput output = directory.createOutput(segmentFileName); try { @@ -229,8 +251,7 @@ public final class SegmentInfos extends Vector { output.writeInt(counter); // write counter output.writeInt(size()); // write infos for (int i = 0; i < size(); i++) { - SegmentInfo si = info(i); - si.write(output); + info(i).write(output); } } finally { @@ -247,6 +268,21 @@ public final class SegmentInfos extends Vector { // It's OK if we fail to write this file since it's // used only as one of the retry fallbacks. } + + lastGeneration = generation; + } + + /** + * Returns a copy of this instance, also copying each + * SegmentInfo. + */ + + public Object clone() { + SegmentInfos sis = (SegmentInfos) super.clone(); + for(int i=0;i= 2.0) { + rate /= 2; + } + if (diskRatio >= 4.0) { + rate /= 2; + } + if (diskRatio >= 6.0) { + rate = 0.0; + } + if (debug) { + System.out.println("\ncycle: " + diskFree + " bytes"); + } + testName = "disk full during reader.close() @ " + thisDiskFree + " bytes"; + } else { + thisDiskFree = 0; + rate = 0.0; + if (debug) { + System.out.println("\ncycle: same writer: unlimited disk space"); + } + testName = "reader re-use after disk full"; + } + + dir.setMaxSizeInBytes(thisDiskFree); + dir.setRandomIOExceptionRate(rate, diskFree); + + try { + if (0 == x) { + int docId = 12; + for(int i=0;i<13;i++) { + reader.deleteDocument(docId); + reader.setNorm(docId, "contents", (float) 2.0); + docId += 12; + } + } + reader.close(); + success = true; + if (0 == x) { + done = true; + } + } catch (IOException e) { + if (debug) { + System.out.println(" hit IOException: " + e); + } + err = e; + if (1 == x) { + e.printStackTrace(); + fail(testName + " hit IOException after disk space was freed up"); + } + } + + // Whether we succeeded or failed, check that all + // un-referenced files were in fact deleted (ie, + // we did not create garbage). Just create a + // new IndexFileDeleter, have it delete + // unreferenced files, then verify that in fact + // no files were deleted: + String[] startFiles = dir.list(); + SegmentInfos infos = new SegmentInfos(); + infos.read(dir); + IndexFileDeleter d = new IndexFileDeleter(infos, dir); + d.findDeletableFiles(); + d.deleteFiles(); + String[] endFiles = dir.list(); + + Arrays.sort(startFiles); + Arrays.sort(endFiles); + + //for(int i=0;i 0) { + s += "\n "; + } + s += l[i]; + } + return s; + } + private void deleteReaderReaderConflict(boolean optimize) throws IOException { Directory dir = getDirectory(true); @@ -697,4 +910,6 @@ public class TestIndexReader extends TestCase } dir.delete(); } + + } diff --git a/src/test/org/apache/lucene/index/TestIndexWriter.java b/src/test/org/apache/lucene/index/TestIndexWriter.java index 6ee6c14804f..d94f63d8556 100644 --- a/src/test/org/apache/lucene/index/TestIndexWriter.java +++ b/src/test/org/apache/lucene/index/TestIndexWriter.java @@ -2,6 +2,7 @@ package org.apache.lucene.index; import java.io.IOException; import java.io.File; +import java.util.Arrays; import junit.framework.TestCase; @@ -10,12 +11,16 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Hits; +import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.MockRAMDirectory; /** * @author goller @@ -88,6 +93,350 @@ public class TestIndexWriter extends TestCase writer.addDocument(doc); } + private void addDocWithIndex(IndexWriter writer, int index) throws IOException + { + Document doc = new Document(); + doc.add(new Field("content", "aaa " + index, Field.Store.YES, Field.Index.TOKENIZED)); + doc.add(new Field("id", "" + index, Field.Store.YES, Field.Index.TOKENIZED)); + writer.addDocument(doc); + } + + /* + Test: make sure when we run out of disk space or hit + random IOExceptions in any of the addIndexes(*) calls + that 1) index is not corrupt (searcher can open/search + it) and 2) transactional semantics are followed: + either all or none of the incoming documents were in + fact added. + */ + public void testAddIndexOnDiskFull() throws IOException + { + + int START_COUNT = 57; + int NUM_DIR = 50; + int END_COUNT = START_COUNT + NUM_DIR*25; + + boolean debug = false; + + // Build up a bunch of dirs that have indexes which we + // will then merge together by calling addIndexes(*): + Directory[] dirs = new Directory[NUM_DIR]; + long inputDiskUsage = 0; + for(int i=0;i= 2.0) { + rate /= 2; + } + if (diskRatio >= 4.0) { + rate /= 2; + } + if (diskRatio >= 6.0) { + rate = 0.0; + } + if (debug) { + System.out.println("\ncycle: " + methodName + ": " + diskFree + " bytes"); + } + } else { + thisDiskFree = 0; + rate = 0.0; + if (debug) { + System.out.println("\ncycle: " + methodName + ", same writer: unlimited disk space"); + } + } + + dir.setMaxSizeInBytes(thisDiskFree); + dir.setRandomIOExceptionRate(rate, diskFree); + + try { + + if (0 == method) { + writer.addIndexes(dirs); + } else if (1 == method) { + IndexReader readers[] = new IndexReader[dirs.length]; + for(int i=0;i 0) { + s += "\n "; + } + s += l[i]; + } + return s; + } + // Make sure we can open an index for create even when a // reader holds it open (this fails pre lock-less // commits on windows): @@ -276,3 +625,5 @@ public class TestIndexWriter extends TestCase dir.delete(); } } + + diff --git a/src/test/org/apache/lucene/index/store/TestRAMDirectory.java b/src/test/org/apache/lucene/index/store/TestRAMDirectory.java index 967cfefef55..3e692a12128 100644 --- a/src/test/org/apache/lucene/index/store/TestRAMDirectory.java +++ b/src/test/org/apache/lucene/index/store/TestRAMDirectory.java @@ -33,6 +33,8 @@ import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.English; +import org.apache.lucene.store.MockRAMDirectory; + /** * JUnit testcase to test RAMDirectory. RAMDirectory itself is used in many testcases, * but not one of them uses an different constructor other than the default constructor. @@ -70,7 +72,7 @@ public class TestRAMDirectory extends TestCase { public void testRAMDirectory () throws IOException { Directory dir = FSDirectory.getDirectory(indexDir, false); - RAMDirectory ramDir = new RAMDirectory(dir); + MockRAMDirectory ramDir = new MockRAMDirectory(dir); // close the underlaying directory dir.close(); @@ -98,7 +100,7 @@ public class TestRAMDirectory extends TestCase { public void testRAMDirectoryFile () throws IOException { - RAMDirectory ramDir = new RAMDirectory(indexDir); + MockRAMDirectory ramDir = new MockRAMDirectory(indexDir); // Check size assertEquals(ramDir.sizeInBytes(), ramDir.getRecomputedSizeInBytes()); @@ -123,7 +125,7 @@ public class TestRAMDirectory extends TestCase { public void testRAMDirectoryString () throws IOException { - RAMDirectory ramDir = new RAMDirectory(indexDir.getCanonicalPath()); + MockRAMDirectory ramDir = new MockRAMDirectory(indexDir.getCanonicalPath()); // Check size assertEquals(ramDir.sizeInBytes(), ramDir.getRecomputedSizeInBytes()); @@ -151,7 +153,7 @@ public class TestRAMDirectory extends TestCase { public void testRAMDirectorySize() throws IOException, InterruptedException { - final RAMDirectory ramDir = new RAMDirectory(indexDir.getCanonicalPath()); + final MockRAMDirectory ramDir = new MockRAMDirectory(indexDir.getCanonicalPath()); final IndexWriter writer = new IndexWriter(ramDir, new WhitespaceAnalyzer(), false); writer.optimize(); diff --git a/src/test/org/apache/lucene/store/MockRAMDirectory.java b/src/test/org/apache/lucene/store/MockRAMDirectory.java new file mode 100644 index 00000000000..0333526d568 --- /dev/null +++ b/src/test/org/apache/lucene/store/MockRAMDirectory.java @@ -0,0 +1,130 @@ +package org.apache.lucene.store; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.File; +import java.util.Iterator; +import java.util.Random; + +/** + * This is a subclass of RAMDirectory that adds methods + * intented to be used only by unit tests. + * @version $Id: RAMDirectory.java 437897 2006-08-29 01:13:10Z yonik $ + */ + +public class MockRAMDirectory extends RAMDirectory { + long maxSize; + + // Max actual bytes used. This is set by MockRAMOutputStream: + long maxUsedSize; + double randomIOExceptionRate; + Random randomState; + + public MockRAMDirectory() throws IOException { + super(); + } + public MockRAMDirectory(String dir) throws IOException { + super(dir); + } + public MockRAMDirectory(Directory dir) throws IOException { + super(dir); + } + public MockRAMDirectory(File dir) throws IOException { + super(dir); + } + + public void setMaxSizeInBytes(long maxSize) { + this.maxSize = maxSize; + } + public long getMaxSizeInBytes() { + return this.maxSize; + } + + /** + * Returns the peek actual storage used (bytes) in this + * directory. + */ + public long getMaxUsedSizeInBytes() { + return this.maxUsedSize; + } + public void resetMaxUsedSizeInBytes() { + this.maxUsedSize = getRecomputedActualSizeInBytes(); + } + + /** + * If 0.0, no exceptions will be thrown. Else this should + * be a double 0.0 - 1.0. We will randomly throw an + * IOException on the first write to an OutputStream based + * on this probability. + */ + public void setRandomIOExceptionRate(double rate, long seed) { + randomIOExceptionRate = rate; + // seed so we have deterministic behaviour: + randomState = new Random(seed); + } + public double getRandomIOExceptionRate() { + return randomIOExceptionRate; + } + + void maybeThrowIOException() throws IOException { + if (randomIOExceptionRate > 0.0) { + int number = Math.abs(randomState.nextInt() % 1000); + if (number < randomIOExceptionRate*1000) { + throw new IOException("a random IOException"); + } + } + } + + public IndexOutput createOutput(String name) { + RAMFile file = new RAMFile(this); + synchronized (this) { + RAMFile existing = (RAMFile)fileMap.get(name); + if (existing!=null) { + sizeInBytes -= existing.sizeInBytes; + existing.directory = null; + } + fileMap.put(name, file); + } + + return new MockRAMOutputStream(this, file); + } + + /** Provided for testing purposes. Use sizeInBytes() instead. */ + public synchronized final long getRecomputedSizeInBytes() { + long size = 0; + Iterator it = files.iterator(); + while (it.hasNext()) + size += ((RAMFile) it.next()).getSizeInBytes(); + return size; + } + + /** Like getRecomputedSizeInBytes(), but, uses actual file + * lengths rather than buffer allocations (which are + * quantized up to nearest + * BufferedIndexOutput.BUFFER_SIZE (now 1024) bytes. + */ + + final long getRecomputedActualSizeInBytes() { + long size = 0; + Iterator it = files.iterator(); + while (it.hasNext()) + size += ((RAMFile) it.next()).length; + return size; + } +} diff --git a/src/test/org/apache/lucene/store/MockRAMOutputStream.java b/src/test/org/apache/lucene/store/MockRAMOutputStream.java new file mode 100644 index 00000000000..45425e822d4 --- /dev/null +++ b/src/test/org/apache/lucene/store/MockRAMOutputStream.java @@ -0,0 +1,83 @@ +package org.apache.lucene.store; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Iterator; + +/** + * Used by MockRAMDirectory to create an output stream that + * will throw an IOException on fake disk full, track max + * disk space actually used, and maybe throw random + * IOExceptions. + */ + +public class MockRAMOutputStream extends RAMOutputStream { + private MockRAMDirectory dir; + private boolean first=true; + + /** Construct an empty output buffer. */ + public MockRAMOutputStream(MockRAMDirectory dir, RAMFile f) { + super(f); + this.dir = dir; + } + + public void close() throws IOException { + super.close(); + + // Now compute actual disk usage & track the maxUsedSize + // in the MockRAMDirectory: + long size = dir.getRecomputedActualSizeInBytes(); + if (size > dir.maxUsedSize) { + dir.maxUsedSize = size; + } + } + + public void flushBuffer(byte[] src, int len) throws IOException { + long freeSpace = dir.maxSize - dir.sizeInBytes(); + long realUsage = 0; + + // Enforce disk full: + if (dir.maxSize != 0 && freeSpace <= len) { + // Compute the real disk free. This will greatly slow + // down our test but makes it more accurate: + realUsage = dir.getRecomputedActualSizeInBytes(); + freeSpace = dir.maxSize - realUsage; + } + + if (dir.maxSize != 0 && freeSpace <= len) { + if (freeSpace > 0 && freeSpace < len) { + realUsage += freeSpace; + super.flushBuffer(src, (int) freeSpace); + } + if (realUsage > dir.maxUsedSize) { + dir.maxUsedSize = realUsage; + } + throw new IOException("fake disk full at " + dir.sizeInBytes() + " bytes"); + } else { + super.flushBuffer(src, len); + } + + if (first) { + // Maybe throw random exception; only do this on first + // write to a new file: + first = false; + dir.maybeThrowIOException(); + } + } +}