From 0b90b5e23d904a669c92a692765d1acd77cf1971 Mon Sep 17 00:00:00 2001
From: Michael McCandless
Date: Mon, 20 Oct 2008 11:33:13 +0000
Subject: [PATCH] LUCENE-1382: add opaque userData String passed to
IndexWriter.commit, recorded into the segments file
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@706240 13f79535-47bb-0310-9956-ffa450edef68
---
CHANGES.txt | 20 ++++---
.../lucene/index/DirectoryIndexReader.java | 10 ++++
.../org/apache/lucene/index/IndexCommit.java | 12 +++-
.../apache/lucene/index/IndexFileDeleter.java | 6 ++
.../org/apache/lucene/index/IndexReader.java | 30 ++++++++++
.../org/apache/lucene/index/IndexWriter.java | 60 +++++++++++++++----
.../org/apache/lucene/index/SegmentInfos.java | 40 ++++++++++++-
.../lucene/index/SnapshotDeletionPolicy.java | 3 +
.../content/xdocs/fileformats.xml | 19 +++++-
.../apache/lucene/index/TestIndexWriter.java | 38 ++++++++++++
10 files changed, 211 insertions(+), 27 deletions(-)
diff --git a/CHANGES.txt b/CHANGES.txt
index 3d30b83419b..9b51789dcad 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -20,13 +20,19 @@ Bug fixes
New features
-1. LUCENE-1411: Added expert API to open an IndexWriter on a prior
- commit, obtained from IndexReader.listCommits. This makes it
- possible to rollback changes to an index even after you've closed
- the IndexWriter that made the changes, assuming you are using an
- IndexDeletionPolicy that keeps past commits around. This is useful
- when building transactional support on top of Lucene. (Mike
- McCandless)
+ 1. LUCENE-1411: Added expert API to open an IndexWriter on a prior
+ commit, obtained from IndexReader.listCommits. This makes it
+ possible to rollback changes to an index even after you've closed
+ the IndexWriter that made the changes, assuming you are using an
+ IndexDeletionPolicy that keeps past commits around. This is useful
+ when building transactional support on top of Lucene. (Mike
+ McCandless)
+
+ 2. LUCENE-1382: Add an optional arbitrary String "commitUserData" to
+ IndexWriter.commit(), which is stored in the segments file and is
+ then retrievable via IndexReader.getCommitUserData instance and
+ static methods. (Shalin Shekhar Mangar via Mike McCandless)
+
Optimizations
diff --git a/src/java/org/apache/lucene/index/DirectoryIndexReader.java b/src/java/org/apache/lucene/index/DirectoryIndexReader.java
index 084e5dd3b7c..535efc0122f 100644
--- a/src/java/org/apache/lucene/index/DirectoryIndexReader.java
+++ b/src/java/org/apache/lucene/index/DirectoryIndexReader.java
@@ -169,6 +169,11 @@ abstract class DirectoryIndexReader extends IndexReader {
return segmentInfos.getVersion();
}
+ public String getCommitUserData() {
+ ensureOpen();
+ return segmentInfos.getUserData();
+ }
+
/**
* Check whether this IndexReader is still using the
* current (i.e., most recently committed) version of the
@@ -367,11 +372,13 @@ abstract class DirectoryIndexReader extends IndexReader {
long generation;
long version;
final boolean isOptimized;
+ final String userData;
ReaderCommit(SegmentInfos infos, Directory dir) throws IOException {
segmentsFileName = infos.getCurrentSegmentFileName();
this.dir = dir;
final int size = infos.size();
+ userData = infos.getUserData();
files = new ArrayList(size);
files.add(segmentsFileName);
for(int i=0;iFor IndexReader implementations that use
* TermInfosReader to read terms, this sets the
* indexDivisor to subsample the number of indexed terms
diff --git a/src/java/org/apache/lucene/index/IndexWriter.java b/src/java/org/apache/lucene/index/IndexWriter.java
index 4018e4ae90e..a88a9d70c0e 100644
--- a/src/java/org/apache/lucene/index/IndexWriter.java
+++ b/src/java/org/apache/lucene/index/IndexWriter.java
@@ -3400,8 +3400,16 @@ public class IndexWriter {
flush(true, false, true);
}
- /** Expert: prepare for commit. This does the first
- * phase of 2-phase commit. You can only call this when
+ /** Expert: prepare for commit.
+ * @see #prepareCommit(String) */
+ public final void prepareCommit() throws CorruptIndexException, IOException {
+ ensureOpen();
+ prepareCommit(null);
+ }
+
+ /**
Expert: prepare for commit, specifying
+ * commitUserData String. This does the first phase of
+ * 2-phase commit. You can only call this when
* autoCommit is false. This method does all steps
* necessary to commit changes since this writer was
* opened: flushes pending added and deleted docs, syncs
@@ -3410,17 +3418,28 @@ public class IndexWriter {
* #commit()} to finish the commit, or {@link
* #rollback()} to revert the commit and undo all changes
* done since the writer was opened.
+ *
+ * You can also just call {@link #commit(String)} directly
+ * without prepareCommit first in which case that method
+ * will internally call prepareCommit.
*
- * You can also just call {@link #commit()} directly
- * without prepareCommit first in which case that method
- * will internally call prepareCommit.
+ * @param commitUserData Opaque String that's recorded
+ * into the segments file in the index, and retrievable
+ * by {@link IndexReader#getCommitUserData}. Note that
+ * when IndexWriter commits itself, for example if open
+ * with autoCommit=true, or, during {@link #close}, the
+ * commitUserData is unchanged (just carried over from
+ * the prior commit). If this is null then the previous
+ * commitUserData is kept. Also, the commitUserData will
+ * only "stick" if there are actually changes in the
+ * index to commit. Therefore it's best to use this
+ * feature only when autoCommit is false.
*/
- public final void prepareCommit() throws CorruptIndexException, IOException {
- ensureOpen();
- prepareCommit(false);
+ public final void prepareCommit(String commitUserData) throws CorruptIndexException, IOException {
+ prepareCommit(commitUserData, false);
}
- private final void prepareCommit(boolean internal) throws CorruptIndexException, IOException {
+ private final void prepareCommit(String commitUserData, boolean internal) throws CorruptIndexException, IOException {
if (hitOOM)
throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot commit");
@@ -3435,11 +3454,11 @@ public class IndexWriter {
flush(true, true, true);
- startCommit(0);
+ startCommit(0, commitUserData);
}
private void commit(long sizeInBytes) throws IOException {
- startCommit(sizeInBytes);
+ startCommit(sizeInBytes, null);
finishCommit();
}
@@ -3482,7 +3501,17 @@ public class IndexWriter {
* @see #prepareCommit
*/
+ /** Commits all changes to the index.
+ * @see #commit(String) */
public final void commit() throws CorruptIndexException, IOException {
+ commit(null);
+ }
+
+ /** Commits all changes to the index, specifying a
+ * commitUserData String. This just calls {@link
+ * #prepareCommit(String)} (if you didn't already call
+ * it) and then {@link #finishCommit}. */
+ public final void commit(String commitUserData) throws CorruptIndexException, IOException {
ensureOpen();
@@ -3494,7 +3523,7 @@ public class IndexWriter {
if (autoCommit || pendingCommit == null) {
message("commit: now prepare");
- prepareCommit(true);
+ prepareCommit(commitUserData, true);
} else
message("commit: already prepared");
@@ -3513,6 +3542,7 @@ public class IndexWriter {
message("commit: wrote segments file \"" + pendingCommit.getCurrentSegmentFileName() + "\"");
lastCommitChangeCount = pendingCommitChangeCount;
segmentInfos.updateGeneration(pendingCommit);
+ segmentInfos.setUserData(pendingCommit.getUserData());
setRollbackSegmentInfos(pendingCommit);
deleter.checkpoint(pendingCommit, true);
} finally {
@@ -4600,7 +4630,7 @@ public class IndexWriter {
* if it wasn't already. If that succeeds, then we
* prepare a new segments_N file but do not fully commit
* it. */
- private void startCommit(long sizeInBytes) throws IOException {
+ private void startCommit(long sizeInBytes, String commitUserData) throws IOException {
assert testPoint("startStartCommit");
@@ -4655,6 +4685,10 @@ public class IndexWriter {
message("startCommit index=" + segString(segmentInfos) + " changeCount=" + changeCount);
toSync = (SegmentInfos) segmentInfos.clone();
+
+ if (commitUserData != null)
+ toSync.setUserData(commitUserData);
+
deleter.incRef(toSync, false);
myChangeCount = changeCount;
} finally {
diff --git a/src/java/org/apache/lucene/index/SegmentInfos.java b/src/java/org/apache/lucene/index/SegmentInfos.java
index 52870e4ce63..588e9269e61 100644
--- a/src/java/org/apache/lucene/index/SegmentInfos.java
+++ b/src/java/org/apache/lucene/index/SegmentInfos.java
@@ -69,8 +69,11 @@ final class SegmentInfos extends Vector {
* omitTf==false) */
public static final int FORMAT_HAS_PROX = -7;
+ /** This format adds optional commit userData (String) storage. */
+ public static final int FORMAT_USER_DATA = -8;
+
/* This must always point to the most recent file format. */
- static final int CURRENT_FORMAT = FORMAT_HAS_PROX;
+ static final int CURRENT_FORMAT = FORMAT_USER_DATA;
public int counter = 0; // used to name new segments
/**
@@ -84,6 +87,8 @@ final class SegmentInfos extends Vector {
// or wrote; this is normally the same as generation except if
// there was an IOException that had interrupted a commit
+ private String userData; // Opaque String that user can specify during IndexWriter.commit
+
/**
* If non-null, information about loading segments_N files
* will be printed here. @see #setInfoStream.
@@ -241,6 +246,13 @@ final class SegmentInfos extends Vector {
version = input.readLong(); // read version
}
+ if (format <= FORMAT_USER_DATA) {
+ if (0 == input.readByte())
+ userData = null;
+ else
+ userData = input.readString();
+ }
+
if (format <= FORMAT_CHECKSUM) {
final long checksumNow = input.getChecksum();
final long checksumThen = input.readLong();
@@ -306,6 +318,12 @@ final class SegmentInfos extends Vector {
for (int i = 0; i < size(); i++) {
info(i).write(output);
}
+ if (userData == null)
+ output.writeByte((byte) 0);
+ else {
+ output.writeByte((byte) 1);
+ output.writeString(userData);
+ }
output.prepareCommit();
success = true;
pendingOutput = output;
@@ -394,6 +412,18 @@ final class SegmentInfos extends Vector {
}.run()).longValue();
}
+ /**
+ * Returns userData from latest segments file
+ * @throws CorruptIndexException if the index is corrupt
+ * @throws IOException if there is a low-level IO error
+ */
+ public static String readCurrentUserData(Directory directory)
+ throws CorruptIndexException, IOException {
+ SegmentInfos sis = new SegmentInfos();
+ sis.read(directory);
+ return sis.getUserData();
+ }
+
/** If non-null, information about retries when loading
* the segments file will be printed to this.
*/
@@ -841,6 +871,14 @@ final class SegmentInfos extends Vector {
return buffer.toString();
}
+ public String getUserData() {
+ return userData;
+ }
+
+ public void setUserData(String data) {
+ userData = data;
+ }
+
/** Replaces all segments in this instance, but keeps
* generation, version, counter so that future commits
* remain write once.
diff --git a/src/java/org/apache/lucene/index/SnapshotDeletionPolicy.java b/src/java/org/apache/lucene/index/SnapshotDeletionPolicy.java
index b094adb05e2..064baf1a514 100644
--- a/src/java/org/apache/lucene/index/SnapshotDeletionPolicy.java
+++ b/src/java/org/apache/lucene/index/SnapshotDeletionPolicy.java
@@ -118,6 +118,9 @@ public class SnapshotDeletionPolicy implements IndexDeletionPolicy {
public long getGeneration() {
return cp.getGeneration();
}
+ public String getUserData() throws IOException {
+ return cp.getUserData();
+ }
}
private List wrapCommits(List commits) {
diff --git a/src/site/src/documentation/content/xdocs/fileformats.xml b/src/site/src/documentation/content/xdocs/fileformats.xml
index e26baa2b917..ead482cac39 100644
--- a/src/site/src/documentation/content/xdocs/fileformats.xml
+++ b/src/site/src/documentation/content/xdocs/fileformats.xml
@@ -830,6 +830,12 @@
NormGenNumField,
IsCompoundFile, DeletionCount, HasProx>SegCount, Checksum
+
+ 2.9 and above:
+ Segments --> Format, Version, NameCounter, SegCount, <SegName, SegSize, DelGen, DocStoreOffset, [DocStoreSegment, DocStoreIsCompoundFile], HasSingleNormFile, NumField,
+ NormGenNumField,
+ IsCompoundFile, DeletionCount, HasProx>SegCount, HasUserData, CommitUserData?, Checksum
+
Format, NameCounter, SegCount, SegSize, NumField,
@@ -841,16 +847,16 @@
- SegName, DocStoreSegment --> String
+ SegName, DocStoreSegment, CommitUserData --> String
IsCompoundFile, HasSingleNormFile,
- DocStoreIsCompoundFile, HasProx --> Int8
+ DocStoreIsCompoundFile, HasProx, HasUserData --> Int8
- Format is -1 as of Lucene 1.4, -3 (SegmentInfos.FORMAT_SINGLE_NORM_FILE) as of Lucene 2.1 and 2.2, -4 (SegmentInfos.FORMAT_SHARED_DOC_STORE) as of Lucene 2.3 and -7 (SegmentInfos.FORMAT_HAS_PROX) as of Lucene 2.4.
+ Format is -1 as of Lucene 1.4, -3 (SegmentInfos.FORMAT_SINGLE_NORM_FILE) as of Lucene 2.1 and 2.2, -4 (SegmentInfos.FORMAT_SHARED_DOC_STORE) as of Lucene 2.3, -7 (SegmentInfos.FORMAT_HAS_PROX) as of Lucene 2.4, and -8 (SegmentInfos.FORMAT_USER_DATA) as of Lucene 2.9.
@@ -951,6 +957,13 @@
omitTf set to false; else, it's 0.
+
+ If HasUserData is 1, then the string
+ CommitUserData is non-null and is stored. This is
+ a string previously passed to IndexWriter's commit
+ or prepareCommit method.
+
+
Lock File
diff --git a/src/test/org/apache/lucene/index/TestIndexWriter.java b/src/test/org/apache/lucene/index/TestIndexWriter.java
index 0272503e3d4..7d08ba673f4 100644
--- a/src/test/org/apache/lucene/index/TestIndexWriter.java
+++ b/src/test/org/apache/lucene/index/TestIndexWriter.java
@@ -4116,6 +4116,44 @@ public class TestIndexWriter extends LuceneTestCase
}
}
+ // LUCENE-1382
+ public void testCommitUserData() throws IOException {
+ Directory dir = new MockRAMDirectory();
+ IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
+ w.setMaxBufferedDocs(2);
+ for(int j=0;j<17;j++)
+ addDoc(w);
+ w.close();
+
+ assertEquals(null, IndexReader.getCommitUserData(dir));
+
+ IndexReader r = IndexReader.open(dir);
+ // commit(String) never called for this index
+ assertEquals(null, r.getCommitUserData());
+ r.close();
+
+ w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
+ w.setMaxBufferedDocs(2);
+ for(int j=0;j<17;j++)
+ addDoc(w);
+ w.commit("test1");
+ w.close();
+
+ assertEquals("test1", IndexReader.getCommitUserData(dir));
+
+ r = IndexReader.open(dir);
+ assertEquals("test1", r.getCommitUserData());
+ r.close();
+
+ w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
+ w.optimize();
+ w.close();
+
+ assertEquals("test1", IndexReader.getCommitUserData(dir));
+
+ dir.close();
+ }
+
public void testOptimizeExceptions() throws IOException {
RAMDirectory startDir = new MockRAMDirectory();
IndexWriter w = new IndexWriter(startDir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);