From d6823ef3746e7c7dba3aa64571d1ec04db620c0b Mon Sep 17 00:00:00 2001
From: Michael McCandless
If an Exception is hit during optimize() (eg, due to + * disk full), the index will not be corrupted. However + * it's possible that one of the segments in the index + * will be in non-CFS format even when using compound file + * format. This will occur when the Exception is hit + * during conversion of the segment into compound + * format.
+ */ public synchronized void optimize() throws IOException { flushRamSegments(); while (segmentInfos.size() > 1 || @@ -579,6 +607,85 @@ public class IndexWriter { } } + /* + * Begin a transaction. During a transaction, any segment + * merges that happen (or ram segments flushed) will not + * write a new segments file and will not remove any files + * that were present at the start of the transaction. You + * must make a matched (try/finall) call to + * commitTransaction() or rollbackTransaction() to finish + * the transaction. + */ + private void startTransaction() throws IOException { + if (inTransaction) { + throw new IOException("transaction is already in process"); + } + rollbackSegmentInfos = (SegmentInfos) segmentInfos.clone(); + protectedSegments = new HashSet(); + for(int i=0;iAfter this completes, the index is optimized. */ + *
After this completes, the index is optimized. + * + *
This method is transactional in how Exceptions are + * handled: it does not commit a new segments_N file until + * all indexes are added. This means if an Exception + * occurs (eg disk full), then either no indexes will have + * been added or they all will have been.
+ * + *If an Exception is hit, it's still possible that all + * indexes were successfully added. This happens when the + * Exception is hit when trying to build a CFS file. In + * this case, one segment in the index will be in non-CFS + * format, even when using compound file format.
+ * + *Also note that on an Exception, the index may still + * have been partially or fully optimized even though none + * of the input indexes were added.
+ * + *Note that this requires temporary free space in the + * Directory up to 2X the sum of all input indexes + * (including the starting index). Exact usage could be + * less but will depend on many factors.
+ * + *See LUCENE-702 + * for details.
+ */ public synchronized void addIndexes(Directory[] dirs) - throws IOException { + throws IOException { + optimize(); // start with zero or 1 seg int start = segmentInfos.size(); - for (int i = 0; i < dirs.length; i++) { - SegmentInfos sis = new SegmentInfos(); // read infos from dir - sis.read(dirs[i]); - for (int j = 0; j < sis.size(); j++) { - segmentInfos.addElement(sis.info(j)); // add each info - } - } + boolean success = false; - // merge newly added segments in log(n) passes - while (segmentInfos.size() > start+mergeFactor) { - for (int base = start; base < segmentInfos.size(); base++) { - int end = Math.min(segmentInfos.size(), base+mergeFactor); - if (end-base > 1) - mergeSegments(segmentInfos, base, end); + startTransaction(); + + try { + for (int i = 0; i < dirs.length; i++) { + SegmentInfos sis = new SegmentInfos(); // read infos from dir + sis.read(dirs[i]); + for (int j = 0; j < sis.size(); j++) { + segmentInfos.addElement(sis.info(j)); // add each info + } + } + + // merge newly added segments in log(n) passes + while (segmentInfos.size() > start+mergeFactor) { + for (int base = start; base < segmentInfos.size(); base++) { + int end = Math.min(segmentInfos.size(), base+mergeFactor); + if (end-base > 1) { + mergeSegments(segmentInfos, base, end); + } + } + } + success = true; + } finally { + if (success) { + commitTransaction(); + } else { + rollbackTransaction(); } } @@ -623,6 +771,11 @@ public class IndexWriter { ** This requires this index not be among those to be added, and the * upper bound* of those segment doc counts not exceed maxMergeDocs. + * + *
See {@link #addIndexes(Directory[])} for + * details on transactional semantics, temporary free + * space required in the Directory, and non-CFS segments + * on an Exception.
*/ public synchronized void addIndexesNoOptimize(Directory[] dirs) throws IOException { @@ -651,96 +804,114 @@ public class IndexWriter { // and target may use compound file or not. So we use mergeSegments() to // copy a segment, which may cause doc count to change because deleted // docs are garbage collected. - // - // In current addIndexes(Directory[]), segment infos in S are added to - // T's "segmentInfos" upfront. Then segments in S are merged to T several - // at a time. Every merge is committed with T's "segmentInfos". So if - // a reader is opened on T while addIndexes() is going on, it could see - // an inconsistent index. AddIndexesNoOptimize() has a similar behaviour. // 1 flush ram segments + flushRamSegments(); // 2 copy segment infos and find the highest level from dirs int start = segmentInfos.size(); int startUpperBound = minMergeDocs; + boolean success = false; + + startTransaction(); + try { - for (int i = 0; i < dirs.length; i++) { - if (directory == dirs[i]) { - // cannot add this index: segments may be deleted in merge before added - throw new IllegalArgumentException("Cannot add this index to itself"); - } - SegmentInfos sis = new SegmentInfos(); // read infos from dir - sis.read(dirs[i]); - for (int j = 0; j < sis.size(); j++) { - SegmentInfo info = sis.info(j); - segmentInfos.addElement(info); // add each info + try { + for (int i = 0; i < dirs.length; i++) { + if (directory == dirs[i]) { + // cannot add this index: segments may be deleted in merge before added + throw new IllegalArgumentException("Cannot add this index to itself"); + } - while (startUpperBound < info.docCount) { - startUpperBound *= mergeFactor; // find the highest level from dirs - if (startUpperBound > maxMergeDocs) { - // upper bound cannot exceed maxMergeDocs - throw new IllegalArgumentException("Upper bound cannot exceed maxMergeDocs"); + SegmentInfos sis = new SegmentInfos(); // read infos from dir + sis.read(dirs[i]); + for (int j = 0; j < sis.size(); j++) { + SegmentInfo info = sis.info(j); + segmentInfos.addElement(info); // add each info + + while (startUpperBound < info.docCount) { + startUpperBound *= mergeFactor; // find the highest level from dirs + if (startUpperBound > maxMergeDocs) { + // upper bound cannot exceed maxMergeDocs + throw new IllegalArgumentException("Upper bound cannot exceed maxMergeDocs"); + } } } } + } catch (IllegalArgumentException e) { + for (int i = segmentInfos.size() - 1; i >= start; i--) { + segmentInfos.remove(i); + } + throw e; } - } catch (IllegalArgumentException e) { - for (int i = segmentInfos.size() - 1; i >= start; i--) { - segmentInfos.remove(i); + + // 3 maybe merge segments starting from the highest level from dirs + maybeMergeSegments(startUpperBound); + + // get the tail segments whose levels <= h + int segmentCount = segmentInfos.size(); + int numTailSegments = 0; + while (numTailSegments < segmentCount + && startUpperBound >= segmentInfos.info(segmentCount - 1 - numTailSegments).docCount) { + numTailSegments++; } - throw e; - } - - // 3 maybe merge segments starting from the highest level from dirs - maybeMergeSegments(startUpperBound); - - // get the tail segments whose levels <= h - int segmentCount = segmentInfos.size(); - int numTailSegments = 0; - while (numTailSegments < segmentCount - && startUpperBound >= segmentInfos.info(segmentCount - 1 - numTailSegments).docCount) { - numTailSegments++; - } - if (numTailSegments == 0) { - return; - } - - // 4 make sure invariants hold for the tail segments whose levels <= h - if (checkNonDecreasingLevels(segmentCount - numTailSegments)) { - // identify the segments from S to be copied (not merged in 3) - int numSegmentsToCopy = 0; - while (numSegmentsToCopy < segmentCount - && directory != segmentInfos.info(segmentCount - 1 - numSegmentsToCopy).dir) { - numSegmentsToCopy++; - } - if (numSegmentsToCopy == 0) { + if (numTailSegments == 0) { + success = true; return; } - // copy those segments from S - for (int i = segmentCount - numSegmentsToCopy; i < segmentCount; i++) { - mergeSegments(segmentInfos, i, i + 1); - } - if (checkNonDecreasingLevels(segmentCount - numSegmentsToCopy)) { - return; - } - } + // 4 make sure invariants hold for the tail segments whose levels <= h + if (checkNonDecreasingLevels(segmentCount - numTailSegments)) { + // identify the segments from S to be copied (not merged in 3) + int numSegmentsToCopy = 0; + while (numSegmentsToCopy < segmentCount + && directory != segmentInfos.info(segmentCount - 1 - numSegmentsToCopy).dir) { + numSegmentsToCopy++; + } + if (numSegmentsToCopy == 0) { + success = true; + return; + } - // invariants do not hold, simply merge those segments - mergeSegments(segmentInfos, segmentCount - numTailSegments, segmentCount); + // copy those segments from S + for (int i = segmentCount - numSegmentsToCopy; i < segmentCount; i++) { + mergeSegments(segmentInfos, i, i + 1); + } + if (checkNonDecreasingLevels(segmentCount - numSegmentsToCopy)) { + success = true; + return; + } + } - // maybe merge segments again if necessary - if (segmentInfos.info(segmentInfos.size() - 1).docCount > startUpperBound) { - maybeMergeSegments(startUpperBound * mergeFactor); + // invariants do not hold, simply merge those segments + mergeSegments(segmentInfos, segmentCount - numTailSegments, segmentCount); + + // maybe merge segments again if necessary + if (segmentInfos.info(segmentInfos.size() - 1).docCount > startUpperBound) { + maybeMergeSegments(startUpperBound * mergeFactor); + } + + success = true; + } finally { + if (success) { + commitTransaction(); + } else { + rollbackTransaction(); + } } } /** Merges the provided indexes into this index. *After this completes, the index is optimized.
*The provided IndexReaders are not closed.
+ + *See {@link #addIndexes(Directory[])} for + * details on transactional semantics, temporary free + * space required in the Directory, and non-CFS segments + * on an Exception.
*/ public synchronized void addIndexes(IndexReader[] readers) throws IOException { @@ -761,26 +932,61 @@ public class IndexWriter { for (int i = 0; i < readers.length; i++) // add new indexes merger.add(readers[i]); - int docCount = merger.merge(); // merge 'em - - segmentInfos.setSize(0); // pop old infos & add new - SegmentInfo info = new SegmentInfo(mergedName, docCount, directory, false); - segmentInfos.addElement(info); - - if(sReader != null) - sReader.close(); + SegmentInfo info; String segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName(); - segmentInfos.write(directory); // commit changes + + boolean success = false; + + startTransaction(); + + try { + int docCount = merger.merge(); // merge 'em + + segmentInfos.setSize(0); // pop old infos & add new + info = new SegmentInfo(mergedName, docCount, directory, false); + segmentInfos.addElement(info); + commitPending = true; + + if(sReader != null) + sReader.close(); + + success = true; + + } finally { + if (!success) { + rollbackTransaction(); + } else { + commitTransaction(); + } + } deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file deleter.deleteSegments(segmentsToDelete); // delete now-unused segments if (useCompoundFile) { - Vector filesToDelete = merger.createCompoundFile(mergedName + ".cfs"); + success = false; + segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName(); - info.setUseCompoundFile(true); - segmentInfos.write(directory); // commit again so readers know we've switched this segment to a compound file + Vector filesToDelete; + + startTransaction(); + + try { + + filesToDelete = merger.createCompoundFile(mergedName + ".cfs"); + + info.setUseCompoundFile(true); + commitPending = true; + success = true; + + } finally { + if (!success) { + rollbackTransaction(); + } else { + commitTransaction(); + } + } deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file deleter.deleteFiles(filesToDelete); // delete now unused files of segment @@ -884,6 +1090,7 @@ public class IndexWriter { // mergeFactor and/or maxBufferedDocs change(s) while (numSegments >= mergeFactor) { // merge the leftmost* mergeFactor segments + int docCount = mergeSegments(segmentInfos, minSegment, minSegment + mergeFactor); numSegments -= mergeFactor; @@ -921,51 +1128,154 @@ public class IndexWriter { SegmentMerger merger = new SegmentMerger(this, mergedName); final Vector segmentsToDelete = new Vector(); - for (int i = minSegment; i < end; i++) { - SegmentInfo si = sourceSegments.info(i); - if (infoStream != null) - infoStream.print(" " + si.name + " (" + si.docCount + " docs)"); - IndexReader reader = SegmentReader.get(si); - merger.add(reader); - if ((reader.directory() == this.directory) || // if we own the directory - (reader.directory() == this.ramDirectory)) - segmentsToDelete.addElement(reader); // queue segment for deletion - } - - int mergedDocCount = merger.merge(); - - if (infoStream != null) { - infoStream.println(" into "+mergedName+" ("+mergedDocCount+" docs)"); - } - - SegmentInfo newSegment = new SegmentInfo(mergedName, mergedDocCount, - directory, false); - if (sourceSegments == ramSegmentInfos) { - sourceSegments.removeAllElements(); - segmentInfos.addElement(newSegment); - } else { - for (int i = end-1; i > minSegment; i--) // remove old infos & add new - sourceSegments.remove(i); - segmentInfos.set(minSegment, newSegment); - } - - // close readers before we attempt to delete now-obsolete segments - merger.closeReaders(); String segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName(); - segmentInfos.write(directory); // commit before deleting + String nextSegmentsFileName = segmentInfos.getNextSegmentFileName(); - deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file - deleter.deleteSegments(segmentsToDelete); // delete now-unused segments + SegmentInfo newSegment = null; + + int mergedDocCount; + + // This is try/finally to make sure merger's readers are closed: + try { + + for (int i = minSegment; i < end; i++) { + SegmentInfo si = sourceSegments.info(i); + if (infoStream != null) + infoStream.print(" " + si.name + " (" + si.docCount + " docs)"); + IndexReader reader = SegmentReader.get(si); + merger.add(reader); + if ((reader.directory() == this.directory) || // if we own the directory + (reader.directory() == this.ramDirectory)) + segmentsToDelete.addElement(reader); // queue segment for deletion + } + + SegmentInfos rollback = null; + boolean success = false; + + // This is try/finally to rollback our internal state + // if we hit exception when doing the merge: + try { + + mergedDocCount = merger.merge(); + + if (infoStream != null) { + infoStream.println(" into "+mergedName+" ("+mergedDocCount+" docs)"); + } + + newSegment = new SegmentInfo(mergedName, mergedDocCount, + directory, false); + + + if (sourceSegments == ramSegmentInfos) { + segmentInfos.addElement(newSegment); + } else { + + if (!inTransaction) { + // Now save the SegmentInfo instances that + // we are replacing: + rollback = (SegmentInfos) segmentInfos.clone(); + } + + for (int i = end-1; i > minSegment; i--) // remove old infos & add new + sourceSegments.remove(i); + + segmentInfos.set(minSegment, newSegment); + } + + if (!inTransaction) { + segmentInfos.write(directory); // commit before deleting + } else { + commitPending = true; + } + + success = true; + + } finally { + + if (success) { + // The non-ram-segments case is already committed + // (above), so all the remains for ram segments case + // is to clear the ram segments: + if (sourceSegments == ramSegmentInfos) { + ramSegmentInfos.removeAllElements(); + } + } else if (!inTransaction) { + + // Must rollback so our state matches index: + + if (sourceSegments == ramSegmentInfos) { + // Simple case: newSegment may or may not have + // been added to the end of our segment infos, + // so just check & remove if so: + if (newSegment != null && + segmentInfos.size() > 0 && + segmentInfos.info(segmentInfos.size()-1) == newSegment) { + segmentInfos.remove(segmentInfos.size()-1); + } + } else if (rollback != null) { + // Rollback the individual SegmentInfo + // instances, but keep original SegmentInfos + // instance (so we don't try to write again the + // same segments_N file -- write once): + segmentInfos.clear(); + segmentInfos.addAll(rollback); + } + + // Delete any partially created files: + deleter.deleteFile(nextSegmentsFileName); + deleter.findDeletableFiles(); + deleter.deleteFiles(); + } + } + } finally { + // close readers before we attempt to delete now-obsolete segments + merger.closeReaders(); + } + + if (!inTransaction) { + deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file + deleter.deleteSegments(segmentsToDelete); // delete now-unused segments + } else { + deleter.addPendingFile(segmentsInfosFileName); // delete old segments_N file + deleter.deleteSegments(segmentsToDelete, protectedSegments); // delete now-unused segments + } if (useCompoundFile) { - Vector filesToDelete = merger.createCompoundFile(mergedName + ".cfs"); - segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName(); - newSegment.setUseCompoundFile(true); - segmentInfos.write(directory); // commit again so readers know we've switched this segment to a compound file + segmentsInfosFileName = nextSegmentsFileName; + nextSegmentsFileName = segmentInfos.getNextSegmentFileName(); - deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file + Vector filesToDelete; + + boolean success = false; + + try { + + filesToDelete = merger.createCompoundFile(mergedName + ".cfs"); + newSegment.setUseCompoundFile(true); + if (!inTransaction) { + segmentInfos.write(directory); // commit again so readers know we've switched this segment to a compound file + } + success = true; + + } finally { + if (!success && !inTransaction) { + // Must rollback: + newSegment.setUseCompoundFile(false); + deleter.deleteFile(mergedName + ".cfs"); + deleter.deleteFile(nextSegmentsFileName); + } + } + + if (!inTransaction) { + deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file + } + + // We can delete these segments whether or not we are + // in a transaction because we had just written them + // above so they can't need protection by the + // transaction: deleter.deleteFiles(filesToDelete); // delete now-unused segments } diff --git a/src/java/org/apache/lucene/index/MultiReader.java b/src/java/org/apache/lucene/index/MultiReader.java index 7c53cbe9556..5a9199efe59 100644 --- a/src/java/org/apache/lucene/index/MultiReader.java +++ b/src/java/org/apache/lucene/index/MultiReader.java @@ -230,6 +230,20 @@ public class MultiReader extends IndexReader { subReaders[i].commit(); } + void startCommit() { + super.startCommit(); + for (int i = 0; i < subReaders.length; i++) { + subReaders[i].startCommit(); + } + } + + void rollbackCommit() { + super.rollbackCommit(); + for (int i = 0; i < subReaders.length; i++) { + subReaders[i].rollbackCommit(); + } + } + protected synchronized void doClose() throws IOException { for (int i = 0; i < subReaders.length; i++) subReaders[i].close(); diff --git a/src/java/org/apache/lucene/index/SegmentInfo.java b/src/java/org/apache/lucene/index/SegmentInfo.java index 9bffc1ebd01..e1860c0eb63 100644 --- a/src/java/org/apache/lucene/index/SegmentInfo.java +++ b/src/java/org/apache/lucene/index/SegmentInfo.java @@ -62,6 +62,23 @@ final class SegmentInfo { preLockless = false; } + /** + * Copy everything from src SegmentInfo into our instance. + */ + void reset(SegmentInfo src) { + name = src.name; + docCount = src.docCount; + dir = src.dir; + preLockless = src.preLockless; + delGen = src.delGen; + if (src.normGen == null) { + normGen = null; + } else { + normGen = new long[src.normGen.length]; + System.arraycopy(src.normGen, 0, normGen, 0, src.normGen.length); + } + isCompoundFile = src.isCompoundFile; + } /** * Construct a new SegmentInfo instance by reading a @@ -151,6 +168,17 @@ final class SegmentInfo { delGen = -1; } + public Object clone () { + SegmentInfo si = new SegmentInfo(name, docCount, dir); + si.isCompoundFile = isCompoundFile; + si.delGen = delGen; + si.preLockless = preLockless; + if (normGen != null) { + si.normGen = (long[]) normGen.clone(); + } + return si; + } + String getDelFileName() { if (delGen == -1) { // In this case we know there is no deletion filename diff --git a/src/java/org/apache/lucene/index/SegmentInfos.java b/src/java/org/apache/lucene/index/SegmentInfos.java index e9d3e5162f4..bebb83df9af 100644 --- a/src/java/org/apache/lucene/index/SegmentInfos.java +++ b/src/java/org/apache/lucene/index/SegmentInfos.java @@ -50,7 +50,11 @@ public final class SegmentInfos extends Vector { * starting with the current time in milliseconds forces to create unique version numbers. */ private long version = System.currentTimeMillis(); - private long generation = 0; // generation of the "segments_N" file we read + + private long generation = 0; // generation of the "segments_N" for the next commit + private long lastGeneration = 0; // generation of the "segments_N" file we last successfully read + // or wrote; this is normally the same as generation except if + // there was an IOException that had interrupted a commit /** * If non-null, information about loading segments_N files @@ -132,12 +136,28 @@ public final class SegmentInfos extends Vector { } /** - * Get the segment_N filename in use by this segment infos. + * Get the segments_N filename in use by this segment infos. */ public String getCurrentSegmentFileName() { return IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", - generation); + lastGeneration); + } + + /** + * Get the next segments_N filename that will be written. + */ + public String getNextSegmentFileName() { + long nextGeneration; + + if (generation == -1) { + nextGeneration = 1; + } else { + nextGeneration = generation+1; + } + return IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, + "", + nextGeneration); } /** @@ -158,6 +178,7 @@ public final class SegmentInfos extends Vector { generation = Long.parseLong(segmentFileName.substring(1+IndexFileNames.SEGMENTS.length()), Character.MAX_RADIX); } + lastGeneration = generation; try { int format = input.readInt(); @@ -199,7 +220,7 @@ public final class SegmentInfos extends Vector { */ public final void read(Directory directory) throws IOException { - generation = -1; + generation = lastGeneration = -1; new FindSegmentsFile(directory) { @@ -212,6 +233,8 @@ public final class SegmentInfos extends Vector { public final void write(Directory directory) throws IOException { + String segmentFileName = getNextSegmentFileName(); + // Always advance the generation on write: if (generation == -1) { generation = 1; @@ -219,7 +242,6 @@ public final class SegmentInfos extends Vector { generation++; } - String segmentFileName = getCurrentSegmentFileName(); IndexOutput output = directory.createOutput(segmentFileName); try { @@ -229,8 +251,7 @@ public final class SegmentInfos extends Vector { output.writeInt(counter); // write counter output.writeInt(size()); // write infos for (int i = 0; i < size(); i++) { - SegmentInfo si = info(i); - si.write(output); + info(i).write(output); } } finally { @@ -247,6 +268,21 @@ public final class SegmentInfos extends Vector { // It's OK if we fail to write this file since it's // used only as one of the retry fallbacks. } + + lastGeneration = generation; + } + + /** + * Returns a copy of this instance, also copying each + * SegmentInfo. + */ + + public Object clone() { + SegmentInfos sis = (SegmentInfos) super.clone(); + for(int i=0;i