LUCENE-1382: add opaque userData String passed to IndexWriter.commit, recorded into the segments file

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@706240 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2008-10-20 11:33:13 +00:00
parent 0250cccacf
commit 0b90b5e23d
10 changed files with 211 additions and 27 deletions

View File

@ -20,13 +20,19 @@ Bug fixes
New features
1. LUCENE-1411: Added expert API to open an IndexWriter on a prior
commit, obtained from IndexReader.listCommits. This makes it
possible to rollback changes to an index even after you've closed
the IndexWriter that made the changes, assuming you are using an
IndexDeletionPolicy that keeps past commits around. This is useful
when building transactional support on top of Lucene. (Mike
McCandless)
1. LUCENE-1411: Added expert API to open an IndexWriter on a prior
commit, obtained from IndexReader.listCommits. This makes it
possible to rollback changes to an index even after you've closed
the IndexWriter that made the changes, assuming you are using an
IndexDeletionPolicy that keeps past commits around. This is useful
when building transactional support on top of Lucene. (Mike
McCandless)
2. LUCENE-1382: Add an optional arbitrary String "commitUserData" to
IndexWriter.commit(), which is stored in the segments file and is
then retrievable via IndexReader.getCommitUserData instance and
static methods. (Shalin Shekhar Mangar via Mike McCandless)
Optimizations

View File

@ -169,6 +169,11 @@ abstract class DirectoryIndexReader extends IndexReader {
return segmentInfos.getVersion();
}
public String getCommitUserData() {
ensureOpen();
return segmentInfos.getUserData();
}
/**
* Check whether this IndexReader is still using the
* current (i.e., most recently committed) version of the
@ -367,11 +372,13 @@ abstract class DirectoryIndexReader extends IndexReader {
long generation;
long version;
final boolean isOptimized;
final String userData;
ReaderCommit(SegmentInfos infos, Directory dir) throws IOException {
segmentsFileName = infos.getCurrentSegmentFileName();
this.dir = dir;
final int size = infos.size();
userData = infos.getUserData();
files = new ArrayList(size);
files.add(segmentsFileName);
for(int i=0;i<size;i++) {
@ -405,6 +412,9 @@ abstract class DirectoryIndexReader extends IndexReader {
public boolean isDeleted() {
return false;
}
public String getUserData() {
return userData;
}
}
/**

View File

@ -101,14 +101,14 @@ public abstract class IndexCommit implements IndexCommitPoint {
}
/** Returns the version for this IndexCommit. This is the
same value that {@link IndexReader#getVersion} would
return if it were opened on this commit. */
* same value that {@link IndexReader#getVersion} would
* return if it were opened on this commit. */
public long getVersion() {
throw new UnsupportedOperationException("This IndexCommit does not support this method.");
}
/** Returns the generation (the _N in segments_N) for this
IndexCommit */
* IndexCommit */
public long getGeneration() {
throw new UnsupportedOperationException("This IndexCommit does not support this method.");
}
@ -120,4 +120,10 @@ public abstract class IndexCommit implements IndexCommitPoint {
public long getTimestamp() throws IOException {
return getDirectory().fileModified(getSegmentsFileName());
}
/** Returns userData, previously passed to {@link
* IndexWriter#commit(String)} for this commit. */
public String getUserData() throws IOException {
throw new UnsupportedOperationException("This IndexCommit does not support this method.");
}
}

View File

@ -585,10 +585,12 @@ final class IndexFileDeleter {
long version;
long generation;
final boolean isOptimized;
final String userData;
public CommitPoint(Collection commitsToDelete, Directory directory, SegmentInfos segmentInfos) throws IOException {
this.directory = directory;
this.commitsToDelete = commitsToDelete;
userData = segmentInfos.getUserData();
segmentsFileName = segmentInfos.getCurrentSegmentFileName();
version = segmentInfos.getVersion();
generation = segmentInfos.getGeneration();
@ -629,6 +631,10 @@ final class IndexFileDeleter {
return generation;
}
public String getUserData() {
return userData;
}
/**
* Called only be the deletion policy, to remove this
* commit point from the index.

View File

@ -457,6 +457,24 @@ public abstract class IndexReader {
return SegmentInfos.readCurrentVersion(directory);
}
/**
* Reads commitUserData, previously passed to {@link
* IndexWriter#commit(String)}, from current index
* segments file. This will return null if {@link
* IndexWriter#commit(String)} has never been called for
* this index.
*
* @param directory where the index resides.
* @return commit userData.
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
*
* @see #getCommitUserData()
*/
public static String getCommitUserData(Directory directory) throws CorruptIndexException, IOException {
return SegmentInfos.readCurrentUserData(directory);
}
/**
* Version number when this IndexReader was opened. Not implemented in the IndexReader base class.
* @throws UnsupportedOperationException unless overridden in subclass
@ -465,6 +483,18 @@ public abstract class IndexReader {
throw new UnsupportedOperationException("This reader does not support this method.");
}
/**
* Retrieve the String userData optionally passed to
* IndexWriter#commit. This will return null if {@link
* IndexWriter#commit(String)} has never been called for
* this index.
*
* @see #getCommitUserData(Directory)
*/
public String getCommitUserData() {
throw new UnsupportedOperationException("This reader does not support this method.");
}
/**<p>For IndexReader implementations that use
* TermInfosReader to read terms, this sets the
* indexDivisor to subsample the number of indexed terms

View File

@ -3400,8 +3400,16 @@ public class IndexWriter {
flush(true, false, true);
}
/** <p>Expert: prepare for commit. This does the first
* phase of 2-phase commit. You can only call this when
/** Expert: prepare for commit.
* @see #prepareCommit(String) */
public final void prepareCommit() throws CorruptIndexException, IOException {
ensureOpen();
prepareCommit(null);
}
/** <p>Expert: prepare for commit, specifying
* commitUserData String. This does the first phase of
* 2-phase commit. You can only call this when
* autoCommit is false. This method does all steps
* necessary to commit changes since this writer was
* opened: flushes pending added and deleted docs, syncs
@ -3410,17 +3418,28 @@ public class IndexWriter {
* #commit()} to finish the commit, or {@link
* #rollback()} to revert the commit and undo all changes
* done since the writer was opened.</p>
*
* You can also just call {@link #commit(String)} directly
* without prepareCommit first in which case that method
* will internally call prepareCommit.
*
* You can also just call {@link #commit()} directly
* without prepareCommit first in which case that method
* will internally call prepareCommit.
* @param commitUserData Opaque String that's recorded
* into the segments file in the index, and retrievable
* by {@link IndexReader#getCommitUserData}. Note that
* when IndexWriter commits itself, for example if open
* with autoCommit=true, or, during {@link #close}, the
* commitUserData is unchanged (just carried over from
* the prior commit). If this is null then the previous
* commitUserData is kept. Also, the commitUserData will
* only "stick" if there are actually changes in the
* index to commit. Therefore it's best to use this
* feature only when autoCommit is false.
*/
public final void prepareCommit() throws CorruptIndexException, IOException {
ensureOpen();
prepareCommit(false);
public final void prepareCommit(String commitUserData) throws CorruptIndexException, IOException {
prepareCommit(commitUserData, false);
}
private final void prepareCommit(boolean internal) throws CorruptIndexException, IOException {
private final void prepareCommit(String commitUserData, boolean internal) throws CorruptIndexException, IOException {
if (hitOOM)
throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot commit");
@ -3435,11 +3454,11 @@ public class IndexWriter {
flush(true, true, true);
startCommit(0);
startCommit(0, commitUserData);
}
private void commit(long sizeInBytes) throws IOException {
startCommit(sizeInBytes);
startCommit(sizeInBytes, null);
finishCommit();
}
@ -3482,7 +3501,17 @@ public class IndexWriter {
* @see #prepareCommit
*/
/** Commits all changes to the index.
* @see #commit(String) */
public final void commit() throws CorruptIndexException, IOException {
commit(null);
}
/** Commits all changes to the index, specifying a
* commitUserData String. This just calls {@link
* #prepareCommit(String)} (if you didn't already call
* it) and then {@link #finishCommit}. */
public final void commit(String commitUserData) throws CorruptIndexException, IOException {
ensureOpen();
@ -3494,7 +3523,7 @@ public class IndexWriter {
if (autoCommit || pendingCommit == null) {
message("commit: now prepare");
prepareCommit(true);
prepareCommit(commitUserData, true);
} else
message("commit: already prepared");
@ -3513,6 +3542,7 @@ public class IndexWriter {
message("commit: wrote segments file \"" + pendingCommit.getCurrentSegmentFileName() + "\"");
lastCommitChangeCount = pendingCommitChangeCount;
segmentInfos.updateGeneration(pendingCommit);
segmentInfos.setUserData(pendingCommit.getUserData());
setRollbackSegmentInfos(pendingCommit);
deleter.checkpoint(pendingCommit, true);
} finally {
@ -4600,7 +4630,7 @@ public class IndexWriter {
* if it wasn't already. If that succeeds, then we
* prepare a new segments_N file but do not fully commit
* it. */
private void startCommit(long sizeInBytes) throws IOException {
private void startCommit(long sizeInBytes, String commitUserData) throws IOException {
assert testPoint("startStartCommit");
@ -4655,6 +4685,10 @@ public class IndexWriter {
message("startCommit index=" + segString(segmentInfos) + " changeCount=" + changeCount);
toSync = (SegmentInfos) segmentInfos.clone();
if (commitUserData != null)
toSync.setUserData(commitUserData);
deleter.incRef(toSync, false);
myChangeCount = changeCount;
} finally {

View File

@ -69,8 +69,11 @@ final class SegmentInfos extends Vector {
* omitTf==false) */
public static final int FORMAT_HAS_PROX = -7;
/** This format adds optional commit userData (String) storage. */
public static final int FORMAT_USER_DATA = -8;
/* This must always point to the most recent file format. */
static final int CURRENT_FORMAT = FORMAT_HAS_PROX;
static final int CURRENT_FORMAT = FORMAT_USER_DATA;
public int counter = 0; // used to name new segments
/**
@ -84,6 +87,8 @@ final class SegmentInfos extends Vector {
// or wrote; this is normally the same as generation except if
// there was an IOException that had interrupted a commit
private String userData; // Opaque String that user can specify during IndexWriter.commit
/**
* If non-null, information about loading segments_N files
* will be printed here. @see #setInfoStream.
@ -241,6 +246,13 @@ final class SegmentInfos extends Vector {
version = input.readLong(); // read version
}
if (format <= FORMAT_USER_DATA) {
if (0 == input.readByte())
userData = null;
else
userData = input.readString();
}
if (format <= FORMAT_CHECKSUM) {
final long checksumNow = input.getChecksum();
final long checksumThen = input.readLong();
@ -306,6 +318,12 @@ final class SegmentInfos extends Vector {
for (int i = 0; i < size(); i++) {
info(i).write(output);
}
if (userData == null)
output.writeByte((byte) 0);
else {
output.writeByte((byte) 1);
output.writeString(userData);
}
output.prepareCommit();
success = true;
pendingOutput = output;
@ -394,6 +412,18 @@ final class SegmentInfos extends Vector {
}.run()).longValue();
}
/**
* Returns userData from latest segments file
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
*/
public static String readCurrentUserData(Directory directory)
throws CorruptIndexException, IOException {
SegmentInfos sis = new SegmentInfos();
sis.read(directory);
return sis.getUserData();
}
/** If non-null, information about retries when loading
* the segments file will be printed to this.
*/
@ -841,6 +871,14 @@ final class SegmentInfos extends Vector {
return buffer.toString();
}
public String getUserData() {
return userData;
}
public void setUserData(String data) {
userData = data;
}
/** Replaces all segments in this instance, but keeps
* generation, version, counter so that future commits
* remain write once.

View File

@ -118,6 +118,9 @@ public class SnapshotDeletionPolicy implements IndexDeletionPolicy {
public long getGeneration() {
return cp.getGeneration();
}
public String getUserData() throws IOException {
return cp.getUserData();
}
}
private List wrapCommits(List commits) {

View File

@ -830,6 +830,12 @@
NormGen<sup>NumField</sup>,
IsCompoundFile, DeletionCount, HasProx&gt;<sup>SegCount</sup>, Checksum
</p>
<p>
<b>2.9 and above:</b>
Segments --&gt; Format, Version, NameCounter, SegCount, &lt;SegName, SegSize, DelGen, DocStoreOffset, [DocStoreSegment, DocStoreIsCompoundFile], HasSingleNormFile, NumField,
NormGen<sup>NumField</sup>,
IsCompoundFile, DeletionCount, HasProx&gt;<sup>SegCount</sup>, HasUserData, CommitUserData?, Checksum
</p>
<p>
Format, NameCounter, SegCount, SegSize, NumField,
@ -841,16 +847,16 @@
</p>
<p>
SegName, DocStoreSegment --&gt; String
SegName, DocStoreSegment, CommitUserData --&gt; String
</p>
<p>
IsCompoundFile, HasSingleNormFile,
DocStoreIsCompoundFile, HasProx --&gt; Int8
DocStoreIsCompoundFile, HasProx, HasUserData --&gt; Int8
</p>
<p>
Format is -1 as of Lucene 1.4, -3 (SegmentInfos.FORMAT_SINGLE_NORM_FILE) as of Lucene 2.1 and 2.2, -4 (SegmentInfos.FORMAT_SHARED_DOC_STORE) as of Lucene 2.3 and -7 (SegmentInfos.FORMAT_HAS_PROX) as of Lucene 2.4.
Format is -1 as of Lucene 1.4, -3 (SegmentInfos.FORMAT_SINGLE_NORM_FILE) as of Lucene 2.1 and 2.2, -4 (SegmentInfos.FORMAT_SHARED_DOC_STORE) as of Lucene 2.3, -7 (SegmentInfos.FORMAT_HAS_PROX) as of Lucene 2.4, and -8 (SegmentInfos.FORMAT_USER_DATA) as of Lucene 2.9.
</p>
<p>
@ -951,6 +957,13 @@
omitTf set to false; else, it's 0.
</p>
<p>
If HasUserData is 1, then the string
CommitUserData is non-null and is stored. This is
a string previously passed to IndexWriter's commit
or prepareCommit method.
</p>
</section>
<section id="Lock File"><title>Lock File</title>

View File

@ -4116,6 +4116,44 @@ public class TestIndexWriter extends LuceneTestCase
}
}
// LUCENE-1382
public void testCommitUserData() throws IOException {
Directory dir = new MockRAMDirectory();
IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
w.setMaxBufferedDocs(2);
for(int j=0;j<17;j++)
addDoc(w);
w.close();
assertEquals(null, IndexReader.getCommitUserData(dir));
IndexReader r = IndexReader.open(dir);
// commit(String) never called for this index
assertEquals(null, r.getCommitUserData());
r.close();
w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
w.setMaxBufferedDocs(2);
for(int j=0;j<17;j++)
addDoc(w);
w.commit("test1");
w.close();
assertEquals("test1", IndexReader.getCommitUserData(dir));
r = IndexReader.open(dir);
assertEquals("test1", r.getCommitUserData());
r.close();
w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
w.optimize();
w.close();
assertEquals("test1", IndexReader.getCommitUserData(dir));
dir.close();
}
public void testOptimizeExceptions() throws IOException {
RAMDirectory startDir = new MockRAMDirectory();
IndexWriter w = new IndexWriter(startDir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);