diff --git a/CHANGES.txt b/CHANGES.txt index 1bd3a4e1eb2..3d30b83419b 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -20,6 +20,14 @@ Bug fixes New features +1. LUCENE-1411: Added expert API to open an IndexWriter on a prior + commit, obtained from IndexReader.listCommits. This makes it + possible to rollback changes to an index even after you've closed + the IndexWriter that made the changes, assuming you are using an + IndexDeletionPolicy that keeps past commits around. This is useful + when building transactional support on top of Lucene. (Mike + McCandless) + Optimizations Documentation diff --git a/src/java/org/apache/lucene/index/DirectoryIndexReader.java b/src/java/org/apache/lucene/index/DirectoryIndexReader.java index 1d3baf13fad..084e5dd3b7c 100644 --- a/src/java/org/apache/lucene/index/DirectoryIndexReader.java +++ b/src/java/org/apache/lucene/index/DirectoryIndexReader.java @@ -266,6 +266,7 @@ abstract class DirectoryIndexReader extends IndexReader { // Have the deleter remove any now unreferenced // files due to this commit: deleter.checkpoint(segmentInfos, true); + deleter.close(); if (writeLock != null) { writeLock.release(); // release write lock diff --git a/src/java/org/apache/lucene/index/IndexFileDeleter.java b/src/java/org/apache/lucene/index/IndexFileDeleter.java index d5edf06b91c..e526ad9e39a 100644 --- a/src/java/org/apache/lucene/index/IndexFileDeleter.java +++ b/src/java/org/apache/lucene/index/IndexFileDeleter.java @@ -122,10 +122,8 @@ final class IndexFileDeleter { /** * Initialize the deleter: find all previous commits in * the Directory, incref the files they reference, call - * the policy to let it delete commits. The incoming - * segmentInfos must have been loaded from a commit point - * and not yet modified. This will remove any files not - * referenced by any of the commits. + * the policy to let it delete commits. This will remove + * any files not referenced by any of the commits. * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ @@ -242,12 +240,9 @@ final class IndexFileDeleter { // startup: policy.onInit(commits); - // It's OK for the onInit to remove the current commit - // point; we just have to checkpoint our in-memory - // SegmentInfos to protect those files that it uses: - if (currentCommitPoint.deleted) { - checkpoint(segmentInfos, false); - } + // Always protect the incoming segmentInfos since + // sometime it may not be the most recent commit + checkpoint(segmentInfos, false); deleteCommits(); } @@ -341,6 +336,14 @@ final class IndexFileDeleter { } public void close() throws IOException { + // DecRef old files from the last checkpoint, if any: + int size = lastFiles.size(); + if (size > 0) { + for(int i=0;ifalse to append to the existing * index * @param deletionPolicy see above + * @param mfl whether or not to limit field lengths * @param indexingChain the {@link DocConsumer} chain to be used to * process documents - * @param mfl whether or not to limit field lengths + * @param commit which commit to open * @throws CorruptIndexException if the index is corrupt * @throws LockObtainFailedException if another writer * has this index open (write.lock could not @@ -1027,9 +1028,9 @@ public class IndexWriter { * false or if there is any other low-level * IO error */ - IndexWriter(Directory d, Analyzer a, boolean create, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl, IndexingChain indexingChain) + IndexWriter(Directory d, Analyzer a, boolean create, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl, IndexingChain indexingChain, IndexCommit commit) throws CorruptIndexException, LockObtainFailedException, IOException { - init(d, a, create, false, deletionPolicy, false, mfl.getLimit(), indexingChain); + init(d, a, create, false, deletionPolicy, false, mfl.getLimit(), indexingChain, commit); } /** @@ -1062,33 +1063,63 @@ public class IndexWriter { */ public IndexWriter(Directory d, boolean autoCommit, Analyzer a, boolean create, IndexDeletionPolicy deletionPolicy) throws CorruptIndexException, LockObtainFailedException, IOException { - init(d, a, create, false, deletionPolicy, autoCommit, DEFAULT_MAX_FIELD_LENGTH); + init(d, a, create, false, deletionPolicy, autoCommit, DEFAULT_MAX_FIELD_LENGTH, null, null); + } + + /** + * Expert: constructs an IndexWriter on specific commit + * point, with a custom {@link IndexDeletionPolicy}, for + * the index in d. Text will be analyzed + * with a. + * + *

This is only meaningful if you've used a {@link + * IndexDeletionPolicy} in that past that keeps more than + * just the last commit. + * + *

This operation is similar to {@link #rollback()}, + * except that method can only rollback what's been done + * with the current instance of IndexWriter since its last + * commit, whereas this method can rollback to an + * arbitrary commit point from the past, assuming the + * {@link IndexDeletionPolicy} has preserved past + * commits. + * + *

NOTE: autoCommit (see above) is set to false with this + * constructor. + * + * @param d the index directory + * @param a the analyzer to use + * @param deletionPolicy see above + * @param mfl whether or not to limit field lengths + * @param commit which commit to open + * @throws CorruptIndexException if the index is corrupt + * @throws LockObtainFailedException if another writer + * has this index open (write.lock could not + * be obtained) + * @throws IOException if the directory cannot be read/written to, or + * if it does not exist and create is + * false or if there is any other low-level + * IO error + */ + public IndexWriter(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl, IndexCommit commit) + throws CorruptIndexException, LockObtainFailedException, IOException { + init(d, a, false, false, deletionPolicy, false, mfl.getLimit(), null, commit); } private void init(Directory d, Analyzer a, boolean closeDir, IndexDeletionPolicy deletionPolicy, - boolean autoCommit, int maxFieldLength) - throws CorruptIndexException, LockObtainFailedException, IOException { - init(d, a, closeDir, deletionPolicy, autoCommit, maxFieldLength, DocumentsWriter.DefaultIndexingChain); - } - - private void init(Directory d, Analyzer a, boolean closeDir, IndexDeletionPolicy deletionPolicy, - boolean autoCommit, int maxFieldLength, IndexingChain indexingChain) + boolean autoCommit, int maxFieldLength, IndexingChain indexingChain, IndexCommit commit) throws CorruptIndexException, LockObtainFailedException, IOException { if (IndexReader.indexExists(d)) { - init(d, a, false, closeDir, deletionPolicy, autoCommit, maxFieldLength, indexingChain); + init(d, a, false, closeDir, deletionPolicy, autoCommit, maxFieldLength, indexingChain, commit); } else { - init(d, a, true, closeDir, deletionPolicy, autoCommit, maxFieldLength, indexingChain); + init(d, a, true, closeDir, deletionPolicy, autoCommit, maxFieldLength, indexingChain, commit); } } private void init(Directory d, Analyzer a, final boolean create, boolean closeDir, - IndexDeletionPolicy deletionPolicy, boolean autoCommit, int maxFieldLength) - throws CorruptIndexException, LockObtainFailedException, IOException { - init(d, a, create, closeDir, deletionPolicy, autoCommit, maxFieldLength, DocumentsWriter.DefaultIndexingChain); - } - private void init(Directory d, Analyzer a, final boolean create, boolean closeDir, - IndexDeletionPolicy deletionPolicy, boolean autoCommit, int maxFieldLength, - IndexingChain indexingChain) + IndexDeletionPolicy deletionPolicy, boolean autoCommit, int maxFieldLength, + IndexingChain indexingChain, IndexCommit commit) throws CorruptIndexException, LockObtainFailedException, IOException { this.closeDir = closeDir; directory = d; @@ -1096,6 +1127,9 @@ public class IndexWriter { setMessageID(defaultInfoStream); this.maxFieldLength = maxFieldLength; + if (indexingChain == null) + indexingChain = DocumentsWriter.DefaultIndexingChain; + if (create) { // Clear the write lock in case it's leftover: directory.clearLock(WRITE_LOCK_NAME); @@ -1122,6 +1156,21 @@ public class IndexWriter { } else { segmentInfos.read(directory); + if (commit != null) { + // Swap out all segments, but, keep metadata in + // SegmentInfos, like version & generation, to + // preserve write-once. This is important if + // readers are open against the future commit + // points. + if (commit.getDirectory() != directory) + throw new IllegalArgumentException("IndexCommit's directory doesn't match my directory"); + SegmentInfos oldInfos = new SegmentInfos(); + oldInfos.read(directory, commit.getSegmentsFileName()); + segmentInfos.replace(oldInfos); + changeCount++; + message("init: loaded commit \"" + commit.getSegmentsFileName() + "\""); + } + // We assume that this segments_N was previously // properly sync'd: for(int i=0;i lastCommit.getGeneration()) + lastCommit = commit; + } + assertTrue(lastCommit != null); + + // Now add 1 doc and optimize + writer = new IndexWriter(dir, new WhitespaceAnalyzer(), policy, IndexWriter.MaxFieldLength.LIMITED); + addDoc(writer); + assertEquals(11, writer.numDocs()); + writer.optimize(); + writer.close(); + + assertEquals(7, IndexReader.listCommits(dir).size()); + + // Now open writer on the commit just before optimize: + writer = new IndexWriter(dir, new WhitespaceAnalyzer(), policy, IndexWriter.MaxFieldLength.LIMITED, lastCommit); + assertEquals(10, writer.numDocs()); + + // Should undo our rollback: + writer.rollback(); + + IndexReader r = IndexReader.open(dir); + // Still optimized, still 11 docs + assertTrue(r.isOptimized()); + assertEquals(11, r.numDocs()); + r.close(); + + writer = new IndexWriter(dir, new WhitespaceAnalyzer(), policy, IndexWriter.MaxFieldLength.LIMITED, lastCommit); + assertEquals(10, writer.numDocs()); + // Commits the rollback: + writer.close(); + + // Now 8 because we made another commit + assertEquals(8, IndexReader.listCommits(dir).size()); + + r = IndexReader.open(dir); + // Not optimized because we rolled it back, and now only + // 10 docs + assertTrue(!r.isOptimized()); + assertEquals(10, r.numDocs()); + r.close(); + + // Reoptimize + writer = new IndexWriter(dir, new WhitespaceAnalyzer(), policy, IndexWriter.MaxFieldLength.LIMITED); + writer.optimize(); + writer.close(); + + r = IndexReader.open(dir); + assertTrue(r.isOptimized()); + assertEquals(10, r.numDocs()); + r.close(); + + // Now open writer on the commit just before optimize, + // but this time keeping only the last commit: + writer = new IndexWriter(dir, new WhitespaceAnalyzer(), new KeepOnlyLastCommitDeletionPolicy(), IndexWriter.MaxFieldLength.LIMITED, lastCommit); + assertEquals(10, writer.numDocs()); + + // Reader still sees optimized index, because writer + // opened on the prior commit has not yet committed: + r = IndexReader.open(dir); + assertTrue(r.isOptimized()); + assertEquals(10, r.numDocs()); + r.close(); + + writer.close(); + + // Now reader sees unoptimized index: + r = IndexReader.open(dir); + assertTrue(!r.isOptimized()); + assertEquals(10, r.numDocs()); + r.close(); + + dir.close(); + } + + /* Test keeping NO commit points. This is a viable and * useful case eg where you want to build a big index with * autoCommit false and you know there are no readers.