LUCENE-1382: add opaque userData String passed to IndexWriter.commit, recorded into the segments file

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@706240 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2008-10-20 11:33:13 +00:00
parent 0250cccacf
commit 0b90b5e23d
10 changed files with 211 additions and 27 deletions

View File

@ -20,13 +20,19 @@ Bug fixes
New features New features
1. LUCENE-1411: Added expert API to open an IndexWriter on a prior 1. LUCENE-1411: Added expert API to open an IndexWriter on a prior
commit, obtained from IndexReader.listCommits. This makes it commit, obtained from IndexReader.listCommits. This makes it
possible to rollback changes to an index even after you've closed possible to rollback changes to an index even after you've closed
the IndexWriter that made the changes, assuming you are using an the IndexWriter that made the changes, assuming you are using an
IndexDeletionPolicy that keeps past commits around. This is useful IndexDeletionPolicy that keeps past commits around. This is useful
when building transactional support on top of Lucene. (Mike when building transactional support on top of Lucene. (Mike
McCandless) McCandless)
2. LUCENE-1382: Add an optional arbitrary String "commitUserData" to
IndexWriter.commit(), which is stored in the segments file and is
then retrievable via IndexReader.getCommitUserData instance and
static methods. (Shalin Shekhar Mangar via Mike McCandless)
Optimizations Optimizations

View File

@ -169,6 +169,11 @@ abstract class DirectoryIndexReader extends IndexReader {
return segmentInfos.getVersion(); return segmentInfos.getVersion();
} }
public String getCommitUserData() {
ensureOpen();
return segmentInfos.getUserData();
}
/** /**
* Check whether this IndexReader is still using the * Check whether this IndexReader is still using the
* current (i.e., most recently committed) version of the * current (i.e., most recently committed) version of the
@ -367,11 +372,13 @@ abstract class DirectoryIndexReader extends IndexReader {
long generation; long generation;
long version; long version;
final boolean isOptimized; final boolean isOptimized;
final String userData;
ReaderCommit(SegmentInfos infos, Directory dir) throws IOException { ReaderCommit(SegmentInfos infos, Directory dir) throws IOException {
segmentsFileName = infos.getCurrentSegmentFileName(); segmentsFileName = infos.getCurrentSegmentFileName();
this.dir = dir; this.dir = dir;
final int size = infos.size(); final int size = infos.size();
userData = infos.getUserData();
files = new ArrayList(size); files = new ArrayList(size);
files.add(segmentsFileName); files.add(segmentsFileName);
for(int i=0;i<size;i++) { for(int i=0;i<size;i++) {
@ -405,6 +412,9 @@ abstract class DirectoryIndexReader extends IndexReader {
public boolean isDeleted() { public boolean isDeleted() {
return false; return false;
} }
public String getUserData() {
return userData;
}
} }
/** /**

View File

@ -101,14 +101,14 @@ public abstract class IndexCommit implements IndexCommitPoint {
} }
/** Returns the version for this IndexCommit. This is the /** Returns the version for this IndexCommit. This is the
same value that {@link IndexReader#getVersion} would * same value that {@link IndexReader#getVersion} would
return if it were opened on this commit. */ * return if it were opened on this commit. */
public long getVersion() { public long getVersion() {
throw new UnsupportedOperationException("This IndexCommit does not support this method."); throw new UnsupportedOperationException("This IndexCommit does not support this method.");
} }
/** Returns the generation (the _N in segments_N) for this /** Returns the generation (the _N in segments_N) for this
IndexCommit */ * IndexCommit */
public long getGeneration() { public long getGeneration() {
throw new UnsupportedOperationException("This IndexCommit does not support this method."); throw new UnsupportedOperationException("This IndexCommit does not support this method.");
} }
@ -120,4 +120,10 @@ public abstract class IndexCommit implements IndexCommitPoint {
public long getTimestamp() throws IOException { public long getTimestamp() throws IOException {
return getDirectory().fileModified(getSegmentsFileName()); return getDirectory().fileModified(getSegmentsFileName());
} }
/** Returns userData, previously passed to {@link
* IndexWriter#commit(String)} for this commit. */
public String getUserData() throws IOException {
throw new UnsupportedOperationException("This IndexCommit does not support this method.");
}
} }

View File

@ -585,10 +585,12 @@ final class IndexFileDeleter {
long version; long version;
long generation; long generation;
final boolean isOptimized; final boolean isOptimized;
final String userData;
public CommitPoint(Collection commitsToDelete, Directory directory, SegmentInfos segmentInfos) throws IOException { public CommitPoint(Collection commitsToDelete, Directory directory, SegmentInfos segmentInfos) throws IOException {
this.directory = directory; this.directory = directory;
this.commitsToDelete = commitsToDelete; this.commitsToDelete = commitsToDelete;
userData = segmentInfos.getUserData();
segmentsFileName = segmentInfos.getCurrentSegmentFileName(); segmentsFileName = segmentInfos.getCurrentSegmentFileName();
version = segmentInfos.getVersion(); version = segmentInfos.getVersion();
generation = segmentInfos.getGeneration(); generation = segmentInfos.getGeneration();
@ -629,6 +631,10 @@ final class IndexFileDeleter {
return generation; return generation;
} }
public String getUserData() {
return userData;
}
/** /**
* Called only be the deletion policy, to remove this * Called only be the deletion policy, to remove this
* commit point from the index. * commit point from the index.

View File

@ -457,6 +457,24 @@ public abstract class IndexReader {
return SegmentInfos.readCurrentVersion(directory); return SegmentInfos.readCurrentVersion(directory);
} }
/**
* Reads commitUserData, previously passed to {@link
* IndexWriter#commit(String)}, from current index
* segments file. This will return null if {@link
* IndexWriter#commit(String)} has never been called for
* this index.
*
* @param directory where the index resides.
* @return commit userData.
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
*
* @see #getCommitUserData()
*/
public static String getCommitUserData(Directory directory) throws CorruptIndexException, IOException {
return SegmentInfos.readCurrentUserData(directory);
}
/** /**
* Version number when this IndexReader was opened. Not implemented in the IndexReader base class. * Version number when this IndexReader was opened. Not implemented in the IndexReader base class.
* @throws UnsupportedOperationException unless overridden in subclass * @throws UnsupportedOperationException unless overridden in subclass
@ -465,6 +483,18 @@ public abstract class IndexReader {
throw new UnsupportedOperationException("This reader does not support this method."); throw new UnsupportedOperationException("This reader does not support this method.");
} }
/**
* Retrieve the String userData optionally passed to
* IndexWriter#commit. This will return null if {@link
* IndexWriter#commit(String)} has never been called for
* this index.
*
* @see #getCommitUserData(Directory)
*/
public String getCommitUserData() {
throw new UnsupportedOperationException("This reader does not support this method.");
}
/**<p>For IndexReader implementations that use /**<p>For IndexReader implementations that use
* TermInfosReader to read terms, this sets the * TermInfosReader to read terms, this sets the
* indexDivisor to subsample the number of indexed terms * indexDivisor to subsample the number of indexed terms

View File

@ -3400,8 +3400,16 @@ public class IndexWriter {
flush(true, false, true); flush(true, false, true);
} }
/** <p>Expert: prepare for commit. This does the first /** Expert: prepare for commit.
* phase of 2-phase commit. You can only call this when * @see #prepareCommit(String) */
public final void prepareCommit() throws CorruptIndexException, IOException {
ensureOpen();
prepareCommit(null);
}
/** <p>Expert: prepare for commit, specifying
* commitUserData String. This does the first phase of
* 2-phase commit. You can only call this when
* autoCommit is false. This method does all steps * autoCommit is false. This method does all steps
* necessary to commit changes since this writer was * necessary to commit changes since this writer was
* opened: flushes pending added and deleted docs, syncs * opened: flushes pending added and deleted docs, syncs
@ -3410,17 +3418,28 @@ public class IndexWriter {
* #commit()} to finish the commit, or {@link * #commit()} to finish the commit, or {@link
* #rollback()} to revert the commit and undo all changes * #rollback()} to revert the commit and undo all changes
* done since the writer was opened.</p> * done since the writer was opened.</p>
*
* You can also just call {@link #commit(String)} directly
* without prepareCommit first in which case that method
* will internally call prepareCommit.
* *
* You can also just call {@link #commit()} directly * @param commitUserData Opaque String that's recorded
* without prepareCommit first in which case that method * into the segments file in the index, and retrievable
* will internally call prepareCommit. * by {@link IndexReader#getCommitUserData}. Note that
* when IndexWriter commits itself, for example if open
* with autoCommit=true, or, during {@link #close}, the
* commitUserData is unchanged (just carried over from
* the prior commit). If this is null then the previous
* commitUserData is kept. Also, the commitUserData will
* only "stick" if there are actually changes in the
* index to commit. Therefore it's best to use this
* feature only when autoCommit is false.
*/ */
public final void prepareCommit() throws CorruptIndexException, IOException { public final void prepareCommit(String commitUserData) throws CorruptIndexException, IOException {
ensureOpen(); prepareCommit(commitUserData, false);
prepareCommit(false);
} }
private final void prepareCommit(boolean internal) throws CorruptIndexException, IOException { private final void prepareCommit(String commitUserData, boolean internal) throws CorruptIndexException, IOException {
if (hitOOM) if (hitOOM)
throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot commit"); throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot commit");
@ -3435,11 +3454,11 @@ public class IndexWriter {
flush(true, true, true); flush(true, true, true);
startCommit(0); startCommit(0, commitUserData);
} }
private void commit(long sizeInBytes) throws IOException { private void commit(long sizeInBytes) throws IOException {
startCommit(sizeInBytes); startCommit(sizeInBytes, null);
finishCommit(); finishCommit();
} }
@ -3482,7 +3501,17 @@ public class IndexWriter {
* @see #prepareCommit * @see #prepareCommit
*/ */
/** Commits all changes to the index.
* @see #commit(String) */
public final void commit() throws CorruptIndexException, IOException { public final void commit() throws CorruptIndexException, IOException {
commit(null);
}
/** Commits all changes to the index, specifying a
* commitUserData String. This just calls {@link
* #prepareCommit(String)} (if you didn't already call
* it) and then {@link #finishCommit}. */
public final void commit(String commitUserData) throws CorruptIndexException, IOException {
ensureOpen(); ensureOpen();
@ -3494,7 +3523,7 @@ public class IndexWriter {
if (autoCommit || pendingCommit == null) { if (autoCommit || pendingCommit == null) {
message("commit: now prepare"); message("commit: now prepare");
prepareCommit(true); prepareCommit(commitUserData, true);
} else } else
message("commit: already prepared"); message("commit: already prepared");
@ -3513,6 +3542,7 @@ public class IndexWriter {
message("commit: wrote segments file \"" + pendingCommit.getCurrentSegmentFileName() + "\""); message("commit: wrote segments file \"" + pendingCommit.getCurrentSegmentFileName() + "\"");
lastCommitChangeCount = pendingCommitChangeCount; lastCommitChangeCount = pendingCommitChangeCount;
segmentInfos.updateGeneration(pendingCommit); segmentInfos.updateGeneration(pendingCommit);
segmentInfos.setUserData(pendingCommit.getUserData());
setRollbackSegmentInfos(pendingCommit); setRollbackSegmentInfos(pendingCommit);
deleter.checkpoint(pendingCommit, true); deleter.checkpoint(pendingCommit, true);
} finally { } finally {
@ -4600,7 +4630,7 @@ public class IndexWriter {
* if it wasn't already. If that succeeds, then we * if it wasn't already. If that succeeds, then we
* prepare a new segments_N file but do not fully commit * prepare a new segments_N file but do not fully commit
* it. */ * it. */
private void startCommit(long sizeInBytes) throws IOException { private void startCommit(long sizeInBytes, String commitUserData) throws IOException {
assert testPoint("startStartCommit"); assert testPoint("startStartCommit");
@ -4655,6 +4685,10 @@ public class IndexWriter {
message("startCommit index=" + segString(segmentInfos) + " changeCount=" + changeCount); message("startCommit index=" + segString(segmentInfos) + " changeCount=" + changeCount);
toSync = (SegmentInfos) segmentInfos.clone(); toSync = (SegmentInfos) segmentInfos.clone();
if (commitUserData != null)
toSync.setUserData(commitUserData);
deleter.incRef(toSync, false); deleter.incRef(toSync, false);
myChangeCount = changeCount; myChangeCount = changeCount;
} finally { } finally {

View File

@ -69,8 +69,11 @@ final class SegmentInfos extends Vector {
* omitTf==false) */ * omitTf==false) */
public static final int FORMAT_HAS_PROX = -7; public static final int FORMAT_HAS_PROX = -7;
/** This format adds optional commit userData (String) storage. */
public static final int FORMAT_USER_DATA = -8;
/* This must always point to the most recent file format. */ /* This must always point to the most recent file format. */
static final int CURRENT_FORMAT = FORMAT_HAS_PROX; static final int CURRENT_FORMAT = FORMAT_USER_DATA;
public int counter = 0; // used to name new segments public int counter = 0; // used to name new segments
/** /**
@ -84,6 +87,8 @@ final class SegmentInfos extends Vector {
// or wrote; this is normally the same as generation except if // or wrote; this is normally the same as generation except if
// there was an IOException that had interrupted a commit // there was an IOException that had interrupted a commit
private String userData; // Opaque String that user can specify during IndexWriter.commit
/** /**
* If non-null, information about loading segments_N files * If non-null, information about loading segments_N files
* will be printed here. @see #setInfoStream. * will be printed here. @see #setInfoStream.
@ -241,6 +246,13 @@ final class SegmentInfos extends Vector {
version = input.readLong(); // read version version = input.readLong(); // read version
} }
if (format <= FORMAT_USER_DATA) {
if (0 == input.readByte())
userData = null;
else
userData = input.readString();
}
if (format <= FORMAT_CHECKSUM) { if (format <= FORMAT_CHECKSUM) {
final long checksumNow = input.getChecksum(); final long checksumNow = input.getChecksum();
final long checksumThen = input.readLong(); final long checksumThen = input.readLong();
@ -306,6 +318,12 @@ final class SegmentInfos extends Vector {
for (int i = 0; i < size(); i++) { for (int i = 0; i < size(); i++) {
info(i).write(output); info(i).write(output);
} }
if (userData == null)
output.writeByte((byte) 0);
else {
output.writeByte((byte) 1);
output.writeString(userData);
}
output.prepareCommit(); output.prepareCommit();
success = true; success = true;
pendingOutput = output; pendingOutput = output;
@ -394,6 +412,18 @@ final class SegmentInfos extends Vector {
}.run()).longValue(); }.run()).longValue();
} }
/**
* Returns userData from latest segments file
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
*/
public static String readCurrentUserData(Directory directory)
throws CorruptIndexException, IOException {
SegmentInfos sis = new SegmentInfos();
sis.read(directory);
return sis.getUserData();
}
/** If non-null, information about retries when loading /** If non-null, information about retries when loading
* the segments file will be printed to this. * the segments file will be printed to this.
*/ */
@ -841,6 +871,14 @@ final class SegmentInfos extends Vector {
return buffer.toString(); return buffer.toString();
} }
public String getUserData() {
return userData;
}
public void setUserData(String data) {
userData = data;
}
/** Replaces all segments in this instance, but keeps /** Replaces all segments in this instance, but keeps
* generation, version, counter so that future commits * generation, version, counter so that future commits
* remain write once. * remain write once.

View File

@ -118,6 +118,9 @@ public class SnapshotDeletionPolicy implements IndexDeletionPolicy {
public long getGeneration() { public long getGeneration() {
return cp.getGeneration(); return cp.getGeneration();
} }
public String getUserData() throws IOException {
return cp.getUserData();
}
} }
private List wrapCommits(List commits) { private List wrapCommits(List commits) {

View File

@ -830,6 +830,12 @@
NormGen<sup>NumField</sup>, NormGen<sup>NumField</sup>,
IsCompoundFile, DeletionCount, HasProx&gt;<sup>SegCount</sup>, Checksum IsCompoundFile, DeletionCount, HasProx&gt;<sup>SegCount</sup>, Checksum
</p> </p>
<p>
<b>2.9 and above:</b>
Segments --&gt; Format, Version, NameCounter, SegCount, &lt;SegName, SegSize, DelGen, DocStoreOffset, [DocStoreSegment, DocStoreIsCompoundFile], HasSingleNormFile, NumField,
NormGen<sup>NumField</sup>,
IsCompoundFile, DeletionCount, HasProx&gt;<sup>SegCount</sup>, HasUserData, CommitUserData?, Checksum
</p>
<p> <p>
Format, NameCounter, SegCount, SegSize, NumField, Format, NameCounter, SegCount, SegSize, NumField,
@ -841,16 +847,16 @@
</p> </p>
<p> <p>
SegName, DocStoreSegment --&gt; String SegName, DocStoreSegment, CommitUserData --&gt; String
</p> </p>
<p> <p>
IsCompoundFile, HasSingleNormFile, IsCompoundFile, HasSingleNormFile,
DocStoreIsCompoundFile, HasProx --&gt; Int8 DocStoreIsCompoundFile, HasProx, HasUserData --&gt; Int8
</p> </p>
<p> <p>
Format is -1 as of Lucene 1.4, -3 (SegmentInfos.FORMAT_SINGLE_NORM_FILE) as of Lucene 2.1 and 2.2, -4 (SegmentInfos.FORMAT_SHARED_DOC_STORE) as of Lucene 2.3 and -7 (SegmentInfos.FORMAT_HAS_PROX) as of Lucene 2.4. Format is -1 as of Lucene 1.4, -3 (SegmentInfos.FORMAT_SINGLE_NORM_FILE) as of Lucene 2.1 and 2.2, -4 (SegmentInfos.FORMAT_SHARED_DOC_STORE) as of Lucene 2.3, -7 (SegmentInfos.FORMAT_HAS_PROX) as of Lucene 2.4, and -8 (SegmentInfos.FORMAT_USER_DATA) as of Lucene 2.9.
</p> </p>
<p> <p>
@ -951,6 +957,13 @@
omitTf set to false; else, it's 0. omitTf set to false; else, it's 0.
</p> </p>
<p>
If HasUserData is 1, then the string
CommitUserData is non-null and is stored. This is
a string previously passed to IndexWriter's commit
or prepareCommit method.
</p>
</section> </section>
<section id="Lock File"><title>Lock File</title> <section id="Lock File"><title>Lock File</title>

View File

@ -4116,6 +4116,44 @@ public class TestIndexWriter extends LuceneTestCase
} }
} }
// LUCENE-1382
public void testCommitUserData() throws IOException {
Directory dir = new MockRAMDirectory();
IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
w.setMaxBufferedDocs(2);
for(int j=0;j<17;j++)
addDoc(w);
w.close();
assertEquals(null, IndexReader.getCommitUserData(dir));
IndexReader r = IndexReader.open(dir);
// commit(String) never called for this index
assertEquals(null, r.getCommitUserData());
r.close();
w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
w.setMaxBufferedDocs(2);
for(int j=0;j<17;j++)
addDoc(w);
w.commit("test1");
w.close();
assertEquals("test1", IndexReader.getCommitUserData(dir));
r = IndexReader.open(dir);
assertEquals("test1", r.getCommitUserData());
r.close();
w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
w.optimize();
w.close();
assertEquals("test1", IndexReader.getCommitUserData(dir));
dir.close();
}
public void testOptimizeExceptions() throws IOException { public void testOptimizeExceptions() throws IOException {
RAMDirectory startDir = new MockRAMDirectory(); RAMDirectory startDir = new MockRAMDirectory();
IndexWriter w = new IndexWriter(startDir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); IndexWriter w = new IndexWriter(startDir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);