From 0b90b5e23d904a669c92a692765d1acd77cf1971 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Mon, 20 Oct 2008 11:33:13 +0000 Subject: [PATCH] LUCENE-1382: add opaque userData String passed to IndexWriter.commit, recorded into the segments file git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@706240 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 20 ++++--- .../lucene/index/DirectoryIndexReader.java | 10 ++++ .../org/apache/lucene/index/IndexCommit.java | 12 +++- .../apache/lucene/index/IndexFileDeleter.java | 6 ++ .../org/apache/lucene/index/IndexReader.java | 30 ++++++++++ .../org/apache/lucene/index/IndexWriter.java | 60 +++++++++++++++---- .../org/apache/lucene/index/SegmentInfos.java | 40 ++++++++++++- .../lucene/index/SnapshotDeletionPolicy.java | 3 + .../content/xdocs/fileformats.xml | 19 +++++- .../apache/lucene/index/TestIndexWriter.java | 38 ++++++++++++ 10 files changed, 211 insertions(+), 27 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 3d30b83419b..9b51789dcad 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -20,13 +20,19 @@ Bug fixes New features -1. LUCENE-1411: Added expert API to open an IndexWriter on a prior - commit, obtained from IndexReader.listCommits. This makes it - possible to rollback changes to an index even after you've closed - the IndexWriter that made the changes, assuming you are using an - IndexDeletionPolicy that keeps past commits around. This is useful - when building transactional support on top of Lucene. (Mike - McCandless) + 1. LUCENE-1411: Added expert API to open an IndexWriter on a prior + commit, obtained from IndexReader.listCommits. This makes it + possible to rollback changes to an index even after you've closed + the IndexWriter that made the changes, assuming you are using an + IndexDeletionPolicy that keeps past commits around. This is useful + when building transactional support on top of Lucene. (Mike + McCandless) + + 2. LUCENE-1382: Add an optional arbitrary String "commitUserData" to + IndexWriter.commit(), which is stored in the segments file and is + then retrievable via IndexReader.getCommitUserData instance and + static methods. (Shalin Shekhar Mangar via Mike McCandless) + Optimizations diff --git a/src/java/org/apache/lucene/index/DirectoryIndexReader.java b/src/java/org/apache/lucene/index/DirectoryIndexReader.java index 084e5dd3b7c..535efc0122f 100644 --- a/src/java/org/apache/lucene/index/DirectoryIndexReader.java +++ b/src/java/org/apache/lucene/index/DirectoryIndexReader.java @@ -169,6 +169,11 @@ abstract class DirectoryIndexReader extends IndexReader { return segmentInfos.getVersion(); } + public String getCommitUserData() { + ensureOpen(); + return segmentInfos.getUserData(); + } + /** * Check whether this IndexReader is still using the * current (i.e., most recently committed) version of the @@ -367,11 +372,13 @@ abstract class DirectoryIndexReader extends IndexReader { long generation; long version; final boolean isOptimized; + final String userData; ReaderCommit(SegmentInfos infos, Directory dir) throws IOException { segmentsFileName = infos.getCurrentSegmentFileName(); this.dir = dir; final int size = infos.size(); + userData = infos.getUserData(); files = new ArrayList(size); files.add(segmentsFileName); for(int i=0;iFor IndexReader implementations that use * TermInfosReader to read terms, this sets the * indexDivisor to subsample the number of indexed terms diff --git a/src/java/org/apache/lucene/index/IndexWriter.java b/src/java/org/apache/lucene/index/IndexWriter.java index 4018e4ae90e..a88a9d70c0e 100644 --- a/src/java/org/apache/lucene/index/IndexWriter.java +++ b/src/java/org/apache/lucene/index/IndexWriter.java @@ -3400,8 +3400,16 @@ public class IndexWriter { flush(true, false, true); } - /**

Expert: prepare for commit. This does the first - * phase of 2-phase commit. You can only call this when + /** Expert: prepare for commit. + * @see #prepareCommit(String) */ + public final void prepareCommit() throws CorruptIndexException, IOException { + ensureOpen(); + prepareCommit(null); + } + + /**

Expert: prepare for commit, specifying + * commitUserData String. This does the first phase of + * 2-phase commit. You can only call this when * autoCommit is false. This method does all steps * necessary to commit changes since this writer was * opened: flushes pending added and deleted docs, syncs @@ -3410,17 +3418,28 @@ public class IndexWriter { * #commit()} to finish the commit, or {@link * #rollback()} to revert the commit and undo all changes * done since the writer was opened.

+ * + * You can also just call {@link #commit(String)} directly + * without prepareCommit first in which case that method + * will internally call prepareCommit. * - * You can also just call {@link #commit()} directly - * without prepareCommit first in which case that method - * will internally call prepareCommit. + * @param commitUserData Opaque String that's recorded + * into the segments file in the index, and retrievable + * by {@link IndexReader#getCommitUserData}. Note that + * when IndexWriter commits itself, for example if open + * with autoCommit=true, or, during {@link #close}, the + * commitUserData is unchanged (just carried over from + * the prior commit). If this is null then the previous + * commitUserData is kept. Also, the commitUserData will + * only "stick" if there are actually changes in the + * index to commit. Therefore it's best to use this + * feature only when autoCommit is false. */ - public final void prepareCommit() throws CorruptIndexException, IOException { - ensureOpen(); - prepareCommit(false); + public final void prepareCommit(String commitUserData) throws CorruptIndexException, IOException { + prepareCommit(commitUserData, false); } - private final void prepareCommit(boolean internal) throws CorruptIndexException, IOException { + private final void prepareCommit(String commitUserData, boolean internal) throws CorruptIndexException, IOException { if (hitOOM) throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot commit"); @@ -3435,11 +3454,11 @@ public class IndexWriter { flush(true, true, true); - startCommit(0); + startCommit(0, commitUserData); } private void commit(long sizeInBytes) throws IOException { - startCommit(sizeInBytes); + startCommit(sizeInBytes, null); finishCommit(); } @@ -3482,7 +3501,17 @@ public class IndexWriter { * @see #prepareCommit */ + /** Commits all changes to the index. + * @see #commit(String) */ public final void commit() throws CorruptIndexException, IOException { + commit(null); + } + + /** Commits all changes to the index, specifying a + * commitUserData String. This just calls {@link + * #prepareCommit(String)} (if you didn't already call + * it) and then {@link #finishCommit}. */ + public final void commit(String commitUserData) throws CorruptIndexException, IOException { ensureOpen(); @@ -3494,7 +3523,7 @@ public class IndexWriter { if (autoCommit || pendingCommit == null) { message("commit: now prepare"); - prepareCommit(true); + prepareCommit(commitUserData, true); } else message("commit: already prepared"); @@ -3513,6 +3542,7 @@ public class IndexWriter { message("commit: wrote segments file \"" + pendingCommit.getCurrentSegmentFileName() + "\""); lastCommitChangeCount = pendingCommitChangeCount; segmentInfos.updateGeneration(pendingCommit); + segmentInfos.setUserData(pendingCommit.getUserData()); setRollbackSegmentInfos(pendingCommit); deleter.checkpoint(pendingCommit, true); } finally { @@ -4600,7 +4630,7 @@ public class IndexWriter { * if it wasn't already. If that succeeds, then we * prepare a new segments_N file but do not fully commit * it. */ - private void startCommit(long sizeInBytes) throws IOException { + private void startCommit(long sizeInBytes, String commitUserData) throws IOException { assert testPoint("startStartCommit"); @@ -4655,6 +4685,10 @@ public class IndexWriter { message("startCommit index=" + segString(segmentInfos) + " changeCount=" + changeCount); toSync = (SegmentInfos) segmentInfos.clone(); + + if (commitUserData != null) + toSync.setUserData(commitUserData); + deleter.incRef(toSync, false); myChangeCount = changeCount; } finally { diff --git a/src/java/org/apache/lucene/index/SegmentInfos.java b/src/java/org/apache/lucene/index/SegmentInfos.java index 52870e4ce63..588e9269e61 100644 --- a/src/java/org/apache/lucene/index/SegmentInfos.java +++ b/src/java/org/apache/lucene/index/SegmentInfos.java @@ -69,8 +69,11 @@ final class SegmentInfos extends Vector { * omitTf==false) */ public static final int FORMAT_HAS_PROX = -7; + /** This format adds optional commit userData (String) storage. */ + public static final int FORMAT_USER_DATA = -8; + /* This must always point to the most recent file format. */ - static final int CURRENT_FORMAT = FORMAT_HAS_PROX; + static final int CURRENT_FORMAT = FORMAT_USER_DATA; public int counter = 0; // used to name new segments /** @@ -84,6 +87,8 @@ final class SegmentInfos extends Vector { // or wrote; this is normally the same as generation except if // there was an IOException that had interrupted a commit + private String userData; // Opaque String that user can specify during IndexWriter.commit + /** * If non-null, information about loading segments_N files * will be printed here. @see #setInfoStream. @@ -241,6 +246,13 @@ final class SegmentInfos extends Vector { version = input.readLong(); // read version } + if (format <= FORMAT_USER_DATA) { + if (0 == input.readByte()) + userData = null; + else + userData = input.readString(); + } + if (format <= FORMAT_CHECKSUM) { final long checksumNow = input.getChecksum(); final long checksumThen = input.readLong(); @@ -306,6 +318,12 @@ final class SegmentInfos extends Vector { for (int i = 0; i < size(); i++) { info(i).write(output); } + if (userData == null) + output.writeByte((byte) 0); + else { + output.writeByte((byte) 1); + output.writeString(userData); + } output.prepareCommit(); success = true; pendingOutput = output; @@ -394,6 +412,18 @@ final class SegmentInfos extends Vector { }.run()).longValue(); } + /** + * Returns userData from latest segments file + * @throws CorruptIndexException if the index is corrupt + * @throws IOException if there is a low-level IO error + */ + public static String readCurrentUserData(Directory directory) + throws CorruptIndexException, IOException { + SegmentInfos sis = new SegmentInfos(); + sis.read(directory); + return sis.getUserData(); + } + /** If non-null, information about retries when loading * the segments file will be printed to this. */ @@ -841,6 +871,14 @@ final class SegmentInfos extends Vector { return buffer.toString(); } + public String getUserData() { + return userData; + } + + public void setUserData(String data) { + userData = data; + } + /** Replaces all segments in this instance, but keeps * generation, version, counter so that future commits * remain write once. diff --git a/src/java/org/apache/lucene/index/SnapshotDeletionPolicy.java b/src/java/org/apache/lucene/index/SnapshotDeletionPolicy.java index b094adb05e2..064baf1a514 100644 --- a/src/java/org/apache/lucene/index/SnapshotDeletionPolicy.java +++ b/src/java/org/apache/lucene/index/SnapshotDeletionPolicy.java @@ -118,6 +118,9 @@ public class SnapshotDeletionPolicy implements IndexDeletionPolicy { public long getGeneration() { return cp.getGeneration(); } + public String getUserData() throws IOException { + return cp.getUserData(); + } } private List wrapCommits(List commits) { diff --git a/src/site/src/documentation/content/xdocs/fileformats.xml b/src/site/src/documentation/content/xdocs/fileformats.xml index e26baa2b917..ead482cac39 100644 --- a/src/site/src/documentation/content/xdocs/fileformats.xml +++ b/src/site/src/documentation/content/xdocs/fileformats.xml @@ -830,6 +830,12 @@ NormGenNumField, IsCompoundFile, DeletionCount, HasProx>SegCount, Checksum

+

+ 2.9 and above: + Segments --> Format, Version, NameCounter, SegCount, <SegName, SegSize, DelGen, DocStoreOffset, [DocStoreSegment, DocStoreIsCompoundFile], HasSingleNormFile, NumField, + NormGenNumField, + IsCompoundFile, DeletionCount, HasProx>SegCount, HasUserData, CommitUserData?, Checksum +

Format, NameCounter, SegCount, SegSize, NumField, @@ -841,16 +847,16 @@

- SegName, DocStoreSegment --> String + SegName, DocStoreSegment, CommitUserData --> String

IsCompoundFile, HasSingleNormFile, - DocStoreIsCompoundFile, HasProx --> Int8 + DocStoreIsCompoundFile, HasProx, HasUserData --> Int8

- Format is -1 as of Lucene 1.4, -3 (SegmentInfos.FORMAT_SINGLE_NORM_FILE) as of Lucene 2.1 and 2.2, -4 (SegmentInfos.FORMAT_SHARED_DOC_STORE) as of Lucene 2.3 and -7 (SegmentInfos.FORMAT_HAS_PROX) as of Lucene 2.4. + Format is -1 as of Lucene 1.4, -3 (SegmentInfos.FORMAT_SINGLE_NORM_FILE) as of Lucene 2.1 and 2.2, -4 (SegmentInfos.FORMAT_SHARED_DOC_STORE) as of Lucene 2.3, -7 (SegmentInfos.FORMAT_HAS_PROX) as of Lucene 2.4, and -8 (SegmentInfos.FORMAT_USER_DATA) as of Lucene 2.9.

@@ -951,6 +957,13 @@ omitTf set to false; else, it's 0.

+

+ If HasUserData is 1, then the string + CommitUserData is non-null and is stored. This is + a string previously passed to IndexWriter's commit + or prepareCommit method. +

+
Lock File diff --git a/src/test/org/apache/lucene/index/TestIndexWriter.java b/src/test/org/apache/lucene/index/TestIndexWriter.java index 0272503e3d4..7d08ba673f4 100644 --- a/src/test/org/apache/lucene/index/TestIndexWriter.java +++ b/src/test/org/apache/lucene/index/TestIndexWriter.java @@ -4116,6 +4116,44 @@ public class TestIndexWriter extends LuceneTestCase } } + // LUCENE-1382 + public void testCommitUserData() throws IOException { + Directory dir = new MockRAMDirectory(); + IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); + w.setMaxBufferedDocs(2); + for(int j=0;j<17;j++) + addDoc(w); + w.close(); + + assertEquals(null, IndexReader.getCommitUserData(dir)); + + IndexReader r = IndexReader.open(dir); + // commit(String) never called for this index + assertEquals(null, r.getCommitUserData()); + r.close(); + + w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); + w.setMaxBufferedDocs(2); + for(int j=0;j<17;j++) + addDoc(w); + w.commit("test1"); + w.close(); + + assertEquals("test1", IndexReader.getCommitUserData(dir)); + + r = IndexReader.open(dir); + assertEquals("test1", r.getCommitUserData()); + r.close(); + + w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); + w.optimize(); + w.close(); + + assertEquals("test1", IndexReader.getCommitUserData(dir)); + + dir.close(); + } + public void testOptimizeExceptions() throws IOException { RAMDirectory startDir = new MockRAMDirectory(); IndexWriter w = new IndexWriter(startDir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);