From d43627afe13b1e3ecc6afd720a9c622ffce37831 Mon Sep 17 00:00:00 2001 From: Shai Erera Date: Mon, 31 May 2010 14:37:11 +0000 Subject: [PATCH] LUCENE-2481: Enhance SnapshotDeletionPolicy to allow taking multiple snapshots (merge from 3x) git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@949756 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 5 + .../PersistentSnapshotDeletionPolicy.java | 179 ++++++++ .../lucene/index/SnapshotDeletionPolicy.java | 394 ++++++++++++++---- .../apache/lucene/index/TestIndexWriter.java | 4 +- .../TestPersistentSnapshotDeletionPolicy.java | 138 ++++++ .../index/TestSnapshotDeletionPolicy.java | 345 +++++++++++---- 6 files changed, 892 insertions(+), 173 deletions(-) create mode 100644 lucene/src/java/org/apache/lucene/index/PersistentSnapshotDeletionPolicy.java create mode 100644 lucene/src/test/org/apache/lucene/index/TestPersistentSnapshotDeletionPolicy.java diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 9e8f354aad7..d49ba4d7118 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -355,6 +355,11 @@ API Changes custom file names. You should use this method to name all your files. (Shai Erera) +* LUCENE-2481: SnapshotDeletionPolicy.snapshot() and release() were replaced + with equivalent ones that take a String (id) as argument. You can pass + whatever ID you want, as long as you use the same one when calling both. + (Shai Erera) + Bug fixes * LUCENE-2119: Don't throw NegativeArraySizeException if you pass diff --git a/lucene/src/java/org/apache/lucene/index/PersistentSnapshotDeletionPolicy.java b/lucene/src/java/org/apache/lucene/index/PersistentSnapshotDeletionPolicy.java new file mode 100644 index 00000000000..2d5498ad42b --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/PersistentSnapshotDeletionPolicy.java @@ -0,0 +1,179 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +import java.io.IOException; +import java.util.List; +import java.util.Map.Entry; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.Fieldable; +import org.apache.lucene.document.Field.Index; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.LockObtainFailedException; +import org.apache.lucene.util.Version; + +/** + * A {@link SnapshotDeletionPolicy} which adds a persistence layer so that + * snapshots can be maintained across the life of an application. The snapshots + * are persisted in a {@link Directory} and are committed as soon as + * {@link #snapshot(String)} or {@link #release(String)} is called. + *

+ * NOTE: this class receives a {@link Directory} to persist the data into + * a Lucene index. It is highly recommended to use a dedicated directory (and on + * stable storage as well) for persisting the snapshots' information, and not + * reuse the content index directory, or otherwise conflicts and index + * corruptions will occur. + *

+ * NOTE: you should call {@link #close()} when you're done using this + * class for safetyness (it will close the {@link IndexWriter} instance used). + */ +public class PersistentSnapshotDeletionPolicy extends SnapshotDeletionPolicy { + + // Used to validate that the given directory includes just one document w/ the + // given ID field. Otherwise, it's not a valid Directory for snapshotting. + private static final String SNAPSHOTS_ID = "$SNAPSHOTS_DOC$"; + + // The index writer which maintains the snapshots metadata + private final IndexWriter writer; + + /** + * {@link PersistentSnapshotDeletionPolicy} wraps another + * {@link IndexDeletionPolicy} to enable flexible snapshotting. + * + * @param primary + * the {@link IndexDeletionPolicy} that is used on non-snapshotted + * commits. Snapshotted commits, by definition, are not deleted until + * explicitly released via {@link #release(String)}. + * @param dir + * the {@link Directory} which will be used to persist the snapshots + * information. + * @param mode + * specifies whether a new index should be created, deleting all + * existing snapshots information (immediately), or open an existing + * index, initializing the class with the snapsthots information. + * @param matchVersion + * specifies the {@link Version} that should be used when opening the + * IndexWriter. + */ + public PersistentSnapshotDeletionPolicy(IndexDeletionPolicy primary, + Directory dir, OpenMode mode, Version matchVersion) + throws CorruptIndexException, LockObtainFailedException, IOException { + super(primary, null); + + // Initialize the index writer over the snapshot directory. + writer = new IndexWriter(dir, new IndexWriterConfig(matchVersion, null).setOpenMode(mode)); + if (mode != OpenMode.APPEND) { + // IndexWriter no longer creates a first commit on an empty Directory. So + // if we were asked to CREATE*, call commit() just to be sure. If the + // index contains information and mode is CREATE_OR_APPEND, it's a no-op. + writer.commit(); + } + + // Initializes the snapshots information. This code should basically run + // only if mode != CREATE, but if it is, it's no harm as we only open the + // reader once and immediately close it. + IndexReader r = writer.getReader(); + try { + int numDocs = r.numDocs(); + // index is allowed to have exactly one document or 0. + if (numDocs == 1) { + Document doc = r.document(r.maxDoc() - 1); + Field sid = doc.getField(SNAPSHOTS_ID); + if (sid == null) { + throw new IllegalStateException("directory is not a valid snapshots store!"); + } + doc.removeField(SNAPSHOTS_ID); + for (Fieldable f : doc.getFields()) { + registerSnapshotInfo(f.name(), f.stringValue(), null); + } + } else if (numDocs != 0) { + throw new IllegalStateException( + "should be at most 1 document in the snapshots directory: " + numDocs); + } + } finally { + r.close(); + } + } + + @Override + public synchronized void onInit(List commits) + throws IOException { + // super.onInit() needs to be called first to ensure that initialization + // behaves as expected. The superclass, SnapshotDeletionPolicy, ensures + // that any snapshot IDs with empty IndexCommits are released. Since this + // happens, this class needs to persist these changes. + super.onInit(commits); + persistSnapshotInfos(null, null); + } + + /** + * Snapshots the last commit using the given ID. Once this method returns, the + * snapshot information is persisted in the directory. + * + * @see SnapshotDeletionPolicy#snapshot(String) + */ + @Override + public synchronized IndexCommit snapshot(String id) throws IOException { + checkSnapshotted(id); + if (SNAPSHOTS_ID.equals(id)) { + throw new IllegalArgumentException(id + " is reserved and cannot be used as a snapshot id"); + } + persistSnapshotInfos(id, lastCommit.getSegmentsFileName()); + return super.snapshot(id); + } + + /** + * Deletes a snapshotted commit by ID. Once this method returns, the snapshot + * information is committed to the directory. + * + * @see SnapshotDeletionPolicy#release(String) + */ + @Override + public synchronized void release(String id) throws IOException { + super.release(id); + persistSnapshotInfos(null, null); + } + + /** Closes the index which writes the snapshots to the directory. */ + public void close() throws CorruptIndexException, IOException { + writer.close(); + } + + /** + * Persists all snapshots information. If the given id and segment are not + * null, it persists their information as well. + */ + private void persistSnapshotInfos(String id, String segment) throws IOException { + writer.deleteAll(); + Document d = new Document(); + d.add(new Field(SNAPSHOTS_ID, "", Store.YES, Index.NO)); + for (Entry e : super.getSnapshots().entrySet()) { + d.add(new Field(e.getKey(), e.getValue(), Store.YES, Index.NO)); + } + if (id != null) { + d.add(new Field(id, segment, Store.YES, Index.NO)); + } + writer.addDocument(d); + writer.commit(); + } + +} diff --git a/lucene/src/java/org/apache/lucene/index/SnapshotDeletionPolicy.java b/lucene/src/java/org/apache/lucene/index/SnapshotDeletionPolicy.java index 5c6528d2fb1..5df387b723a 100644 --- a/lucene/src/java/org/apache/lucene/index/SnapshotDeletionPolicy.java +++ b/lucene/src/java/org/apache/lucene/index/SnapshotDeletionPolicy.java @@ -18,131 +18,345 @@ package org.apache.lucene.index; */ import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.ArrayList; import java.util.Map; +import java.util.Set; +import java.util.Map.Entry; import java.io.IOException; + import org.apache.lucene.store.Directory; -/** A {@link IndexDeletionPolicy} that wraps around any other - * {@link IndexDeletionPolicy} and adds the ability to hold and - * later release a single "snapshot" of an index. While - * the snapshot is held, the {@link IndexWriter} will not - * remove any files associated with it even if the index is - * otherwise being actively, arbitrarily changed. Because - * we wrap another arbitrary {@link IndexDeletionPolicy}, this - * gives you the freedom to continue using whatever {@link - * IndexDeletionPolicy} you would normally want to use with your - * index. Note that you can re-use a single instance of - * SnapshotDeletionPolicy across multiple writers as long - * as they are against the same index Directory. Any - * snapshot held when a writer is closed will "survive" - * when the next writer is opened. - * - * @lucene.experimental */ - +/** + * An {@link IndexDeletionPolicy} that wraps around any other + * {@link IndexDeletionPolicy} and adds the ability to hold and later release + * snapshots of an index. While a snapshot is held, the {@link IndexWriter} will + * not remove any files associated with it even if the index is otherwise being + * actively, arbitrarily changed. Because we wrap another arbitrary + * {@link IndexDeletionPolicy}, this gives you the freedom to continue using + * whatever {@link IndexDeletionPolicy} you would normally want to use with your + * index. + * + *

+ * This class maintains all snapshots in-memory, and so the information is not + * persisted and not protected against system failures. If persistency is + * important, you can use {@link PersistentSnapshotDeletionPolicy} (or your own + * extension) and when creating a new instance of this deletion policy, pass the + * persistent snapshots information to + * {@link #SnapshotDeletionPolicy(IndexDeletionPolicy, Map)}. + * + * @lucene.experimental + */ public class SnapshotDeletionPolicy implements IndexDeletionPolicy { - private IndexCommit lastCommit; - private IndexDeletionPolicy primary; - private String snapshot; - - public SnapshotDeletionPolicy(IndexDeletionPolicy primary) { - this.primary = primary; - } - - public synchronized void onInit(List commits) throws IOException { - primary.onInit(wrapCommits(commits)); - lastCommit = commits.get(commits.size()-1); - } - - public synchronized void onCommit(List commits) throws IOException { - primary.onCommit(wrapCommits(commits)); - lastCommit = commits.get(commits.size()-1); - } - - /** Take a snapshot of the most recent commit to the - * index. You must call release() to free this snapshot. - * Note that while the snapshot is held, the files it - * references will not be deleted, which will consume - * additional disk space in your index. If you take a - * snapshot at a particularly bad time (say just before - * you call optimize()) then in the worst case this could - * consume an extra 1X of your total index size, until - * you release the snapshot. */ - public synchronized IndexCommit snapshot() { - if (lastCommit == null) { - throw new IllegalStateException("no index commits to snapshot !"); + /** Holds a Snapshot's information. */ + private static class SnapshotInfo { + String id; + String segmentsFileName; + IndexCommit commit; + + public SnapshotInfo(String id, String segmentsFileName, IndexCommit commit) { + this.id = id; + this.segmentsFileName = segmentsFileName; + this.commit = commit; } - if (snapshot == null) - snapshot = lastCommit.getSegmentsFileName(); - else - throw new IllegalStateException("snapshot is already set; please call release() first"); - return lastCommit; + @Override + public String toString() { + return id + " : " + segmentsFileName; + } } + + protected class SnapshotCommitPoint extends IndexCommit { + protected IndexCommit cp; - /** Release the currently held snapshot. */ - public synchronized void release() { - if (snapshot != null) - snapshot = null; - else - throw new IllegalStateException("snapshot was not set; please call snapshot() first"); - } - - private class MyCommitPoint extends IndexCommit { - IndexCommit cp; - MyCommitPoint(IndexCommit cp) { + protected SnapshotCommitPoint(IndexCommit cp) { this.cp = cp; } - @Override - public String getSegmentsFileName() { - return cp.getSegmentsFileName(); + + /** + * Returns true if this segment can be deleted. The default implementation + * returns false if this segment is currently held as snapshot. + */ + protected boolean shouldDelete(String segmentsFileName) { + return !segmentsFileToIDs.containsKey(segmentsFileName); } + @Override - public Collection getFileNames() throws IOException { - return cp.getFileNames(); + public void delete() { + synchronized (SnapshotDeletionPolicy.this) { + // Suppress the delete request if this commit point is + // currently snapshotted. + if (shouldDelete(getSegmentsFileName())) { + cp.delete(); + } + } } + @Override public Directory getDirectory() { return cp.getDirectory(); } + @Override - public void delete() { - synchronized(SnapshotDeletionPolicy.this) { - // Suppress the delete request if this commit point is - // our current snapshot. - if (snapshot == null || !snapshot.equals(getSegmentsFileName())) - cp.delete(); - } - } - @Override - public boolean isDeleted() { - return cp.isDeleted(); - } - @Override - public long getVersion() { - return cp.getVersion(); + public Collection getFileNames() throws IOException { + return cp.getFileNames(); } + @Override public long getGeneration() { return cp.getGeneration(); } + @Override - public Map getUserData() throws IOException { + public String getSegmentsFileName() { + return cp.getSegmentsFileName(); + } + + @Override + public Map getUserData() throws IOException { return cp.getUserData(); } + + @Override + public long getVersion() { + return cp.getVersion(); + } + + @Override + public boolean isDeleted() { + return cp.isDeleted(); + } + @Override public boolean isOptimized() { return cp.isOptimized(); } } - private List wrapCommits(List commits) { - final int count = commits.size(); - List myCommits = new ArrayList(count); - for(int i=0;i idToSnapshot = new HashMap(); + + // multiple IDs could point to the same commit point (segments file name) + private Map> segmentsFileToIDs = new HashMap>(); + + private IndexDeletionPolicy primary; + protected IndexCommit lastCommit; + + public SnapshotDeletionPolicy(IndexDeletionPolicy primary) { + this.primary = primary; } + + /** + * {@link SnapshotDeletionPolicy} wraps another {@link IndexDeletionPolicy} to + * enable flexible snapshotting. + * + * @param primary + * the {@link IndexDeletionPolicy} that is used on non-snapshotted + * commits. Snapshotted commits, are not deleted until explicitly + * released via {@link #release(String)} + * @param snapshotsInfo + * A mapping of snapshot ID to the segments filename that is being + * snapshotted. The expected input would be the output of + * {@link #getSnapshots()}. A null value signals that there are no + * initial snapshots to maintain. + */ + public SnapshotDeletionPolicy(IndexDeletionPolicy primary, + Map snapshotsInfo) { + this(primary); + + if (snapshotsInfo != null) { + // Add the ID->segmentIDs here - the actual IndexCommits will be + // reconciled on the call to onInit() + for (Entry e : snapshotsInfo.entrySet()) { + registerSnapshotInfo(e.getKey(), e.getValue(), null); + } + } + } + + /** + * Checks if the given id is already used by another snapshot, and throws + * {@link IllegalStateException} if it is. + */ + protected void checkSnapshotted(String id) { + if (isSnapshotted(id)) { + throw new IllegalStateException("Snapshot ID " + id + + " is already used - must be unique"); + } + } + + /** Registers the given snapshot information. */ + protected void registerSnapshotInfo(String id, String segment, IndexCommit commit) { + idToSnapshot.put(id, new SnapshotInfo(id, segment, commit)); + Set ids = segmentsFileToIDs.get(segment); + if (ids == null) { + ids = new HashSet(); + segmentsFileToIDs.put(segment, ids); + } + ids.add(id); + } + + protected List wrapCommits(List commits) { + List wrappedCommits = new ArrayList(commits.size()); + for (IndexCommit ic : commits) { + wrappedCommits.add(new SnapshotCommitPoint(ic)); + } + return wrappedCommits; + } + + /** + * Get a snapshotted IndexCommit by ID. The IndexCommit can then be used to + * open an IndexReader on a specific commit point, or rollback the index by + * opening an IndexWriter with the IndexCommit specified in its + * {@link IndexWriterConfig}. + * + * @param id + * a unique identifier of the commit that was snapshotted. + * @throws IllegalStateException + * if no snapshot exists by the specified ID. + * @return The {@link IndexCommit} for this particular snapshot. + */ + public synchronized IndexCommit getSnapshot(String id) { + SnapshotInfo snapshotInfo = idToSnapshot.get(id); + if (snapshotInfo == null) { + throw new IllegalStateException("No snapshot exists by ID: " + id); + } + return snapshotInfo.commit; + } + + /** + * Get all the snapshots in a map of snapshot IDs to the segments they + * 'cover.' This can be passed to + * {@link #SnapshotDeletionPolicy(IndexDeletionPolicy, Map)} in order to + * initialize snapshots at construction. + */ + public synchronized Map getSnapshots() { + Map snapshots = new HashMap(); + for (Entry e : idToSnapshot.entrySet()) { + snapshots.put(e.getKey(), e.getValue().segmentsFileName); + } + return snapshots; + } + + /** + * Returns true if the given ID is already used by a snapshot. You can call + * this method before {@link #snapshot(String)} if you are not sure whether + * the ID is already used or not. + */ + public boolean isSnapshotted(String id) { + return idToSnapshot.containsKey(id); + } + + public synchronized void onCommit(List commits) + throws IOException { + primary.onCommit(wrapCommits(commits)); + lastCommit = commits.get(commits.size() - 1); + } + + public synchronized void onInit(List commits) + throws IOException { + primary.onInit(wrapCommits(commits)); + lastCommit = commits.get(commits.size() - 1); + + /* + * Assign snapshotted IndexCommits to their correct snapshot IDs as + * specified in the constructor. + */ + for (IndexCommit commit : commits) { + Set ids = segmentsFileToIDs.get(commit.getSegmentsFileName()); + if (ids != null) { + for (String id : ids) { + idToSnapshot.get(id).commit = commit; + } + } + } + + /* + * Second, see if there are any instances where a snapshot ID was specified + * in the constructor but an IndexCommit doesn't exist. In this case, the ID + * should be removed. + * + * Note: This code is protective for extreme cases where IDs point to + * non-existent segments. As the constructor should have received its + * information via a call to getSnapshots(), the data should be well-formed. + */ + // Find lost snapshots + ArrayList idsToRemove = null; + for (Entry e : idToSnapshot.entrySet()) { + if (e.getValue().commit == null) { + if (idsToRemove == null) { + idsToRemove = new ArrayList(); + } + idsToRemove.add(e.getKey()); + } + } + // Finally, remove those 'lost' snapshots. + if (idsToRemove != null) { + for (String id : idsToRemove) { + SnapshotInfo info = idToSnapshot.remove(id); + segmentsFileToIDs.remove(info.segmentsFileName); + } + } + } + + /** + * Release a snapshotted commit by ID. + * + * @param id + * a unique identifier of the commit that is un-snapshotted. + * @throws IllegalStateException + * if no snapshot exists by this ID. + */ + public synchronized void release(String id) throws IOException { + SnapshotInfo info = idToSnapshot.remove(id); + if (info == null) { + throw new IllegalStateException("Snapshot doesn't exist: " + id); + } + Set ids = segmentsFileToIDs.get(info.segmentsFileName); + if (ids != null) { + ids.remove(id); + if (ids.size() == 0) { + segmentsFileToIDs.remove(info.segmentsFileName); + } + } + } + + /** + * Snapshots the last commit. Once a commit is 'snapshotted,' it is protected + * from deletion (as long as this {@link IndexDeletionPolicy} is used). The + * commit can be removed by calling {@link #release(String)} using the same ID + * parameter followed by a call to {@link IndexWriter#deleteUnusedFiles()}. + *

+ * NOTE: ID must be unique in the system. If the same ID is used twice, + * an {@link IllegalStateException} is thrown. + *

+ * NOTE: while the snapshot is held, the files it references will not + * be deleted, which will consume additional disk space in your index. If you + * take a snapshot at a particularly bad time (say just before you call + * optimize()) then in the worst case this could consume an extra 1X of your + * total index size, until you release the snapshot. + * + * @param id + * a unique identifier of the commit that is being snapshotted. + * @throws IllegalStateException + * if either there is no 'last commit' to snapshot, or if the + * parameter 'ID' refers to an already snapshotted commit. + * @return the {@link IndexCommit} that was snapshotted. + */ + public synchronized IndexCommit snapshot(String id) throws IOException { + if (lastCommit == null) { + // no commit exists. Really shouldn't happen, but might be if SDP is + // accessed before onInit or onCommit were called. + throw new IllegalStateException("No index commit to snapshot"); + } + + // Can't use the same snapshot ID twice... + checkSnapshotted(id); + + registerSnapshotInfo(id, lastCommit.getSegmentsFileName(), lastCommit); + return lastCommit; + } + } diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java b/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java index bfc8464565b..c319cc7a26e 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java @@ -4838,7 +4838,7 @@ public class TestIndexWriter extends LuceneTestCase { assertEquals(1, IndexReader.listCommits(dir).size()); // Keep that commit - sdp.snapshot(); + sdp.snapshot("id"); // Second commit - now KeepOnlyLastCommit cannot delete the prev commit. doc = new Document(); @@ -4848,7 +4848,7 @@ public class TestIndexWriter extends LuceneTestCase { assertEquals(2, IndexReader.listCommits(dir).size()); // Should delete the unreferenced commit - sdp.release(); + sdp.release("id"); writer.deleteUnusedFiles(); assertEquals(1, IndexReader.listCommits(dir).size()); diff --git a/lucene/src/test/org/apache/lucene/index/TestPersistentSnapshotDeletionPolicy.java b/lucene/src/test/org/apache/lucene/index/TestPersistentSnapshotDeletionPolicy.java new file mode 100644 index 00000000000..16f02b683ed --- /dev/null +++ b/lucene/src/test/org/apache/lucene/index/TestPersistentSnapshotDeletionPolicy.java @@ -0,0 +1,138 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +import static org.junit.Assert.*; + +import java.io.IOException; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.lucene.document.Document; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.MockRAMDirectory; +import org.junit.Test; + +public class TestPersistentSnapshotDeletionPolicy extends TestSnapshotDeletionPolicy { + + // Keep it a class member so that getDeletionPolicy can use it + private Directory snapshotDir = new MockRAMDirectory(); + + @Override + protected SnapshotDeletionPolicy getDeletionPolicy() throws IOException { + IndexWriter.unlock(snapshotDir); + return new PersistentSnapshotDeletionPolicy( + new KeepOnlyLastCommitDeletionPolicy(), snapshotDir, OpenMode.CREATE, + TEST_VERSION_CURRENT); + } + + @Override + protected SnapshotDeletionPolicy getDeletionPolicy(Map snapshots) throws IOException { + SnapshotDeletionPolicy sdp = getDeletionPolicy(); + if (snapshots != null) { + for (Entry e: snapshots.entrySet()) { + sdp.registerSnapshotInfo(e.getKey(), e.getValue(), null); + } + } + return sdp; + } + + @Override + @Test + public void testExistingSnapshots() throws Exception { + int numSnapshots = 3; + Directory dir = new MockRAMDirectory(); + PersistentSnapshotDeletionPolicy psdp = (PersistentSnapshotDeletionPolicy) getDeletionPolicy(); + IndexWriter writer = new IndexWriter(dir, getConfig(psdp)); + prepareIndexAndSnapshots(psdp, writer, numSnapshots, "snapshot"); + writer.close(); + psdp.close(); + + // Re-initialize and verify snapshots were persisted + psdp = new PersistentSnapshotDeletionPolicy( + new KeepOnlyLastCommitDeletionPolicy(), snapshotDir, OpenMode.APPEND, + TEST_VERSION_CURRENT); + new IndexWriter(dir, getConfig(psdp)).close(); + + assertSnapshotExists(dir, psdp, numSnapshots); + assertEquals(numSnapshots, psdp.getSnapshots().size()); + psdp.close(); + } + + @Test(expected=IllegalArgumentException.class) + public void testIllegalSnapshotId() throws Exception { + getDeletionPolicy().snapshot("$SNAPSHOTS_DOC$"); + } + + @Test + public void testInvalidSnapshotInfos() throws Exception { + // Add the correct number of documents (1), but without snapshot information + IndexWriter writer = new IndexWriter(snapshotDir, getConfig(null)); + writer.addDocument(new Document()); + writer.close(); + try { + new PersistentSnapshotDeletionPolicy( + new KeepOnlyLastCommitDeletionPolicy(), snapshotDir, OpenMode.APPEND, + TEST_VERSION_CURRENT); + fail("should not have succeeded to read from an invalid Directory"); + } catch (IllegalStateException e) { + } + } + + @Test + public void testNoSnapshotInfos() throws Exception { + // Initialize an empty index in snapshotDir - PSDP should initialize successfully. + new IndexWriter(snapshotDir, getConfig(null)).close(); + new PersistentSnapshotDeletionPolicy( + new KeepOnlyLastCommitDeletionPolicy(), snapshotDir, OpenMode.APPEND, + TEST_VERSION_CURRENT).close(); + } + + @Test(expected=IllegalStateException.class) + public void testTooManySnapshotInfos() throws Exception { + // Write two documents to the snapshots directory - illegal. + IndexWriter writer = new IndexWriter(snapshotDir, getConfig(null)); + writer.addDocument(new Document()); + writer.addDocument(new Document()); + writer.close(); + + new PersistentSnapshotDeletionPolicy( + new KeepOnlyLastCommitDeletionPolicy(), snapshotDir, OpenMode.APPEND, + TEST_VERSION_CURRENT).close(); + fail("should not have succeeded to open an invalid directory"); + } + + @Test + public void testSnapshotRelease() throws Exception { + Directory dir = new MockRAMDirectory(); + PersistentSnapshotDeletionPolicy psdp = (PersistentSnapshotDeletionPolicy) getDeletionPolicy(); + IndexWriter writer = new IndexWriter(dir, getConfig(psdp)); + prepareIndexAndSnapshots(psdp, writer, 1, "snapshot"); + writer.close(); + + psdp.release("snapshot0"); + psdp.close(); + + psdp = new PersistentSnapshotDeletionPolicy( + new KeepOnlyLastCommitDeletionPolicy(), snapshotDir, OpenMode.APPEND, + TEST_VERSION_CURRENT); + assertEquals("Should have no snapshots !", 0, psdp.getSnapshots().size()); + } + +} diff --git a/lucene/src/test/org/apache/lucene/index/TestSnapshotDeletionPolicy.java b/lucene/src/test/org/apache/lucene/index/TestSnapshotDeletionPolicy.java index e13be52d876..42afad50fd3 100644 --- a/lucene/src/test/org/apache/lucene/index/TestSnapshotDeletionPolicy.java +++ b/lucene/src/test/org/apache/lucene/index/TestSnapshotDeletionPolicy.java @@ -1,25 +1,9 @@ package org.apache.lucene.index; -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - import static org.junit.Assert.*; import java.util.Collection; +import java.util.Map; import java.io.File; import java.io.IOException; @@ -46,9 +30,58 @@ import org.junit.Test; // public class TestSnapshotDeletionPolicy extends LuceneTestCaseJ4 { - + public static final String INDEX_PATH = "test.snapshots"; + protected IndexWriterConfig getConfig(IndexDeletionPolicy dp) { + IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()); + if (dp != null) { + conf.setIndexDeletionPolicy(dp); + } + return conf; + } + + protected void checkSnapshotExists(Directory dir, IndexCommit c) throws Exception { + String segFileName = c.getSegmentsFileName(); + assertTrue("segments file not found in directory: " + segFileName, dir.fileExists(segFileName)); + } + + protected void checkMaxDoc(IndexCommit commit, int expectedMaxDoc) throws Exception { + IndexReader reader = IndexReader.open(commit, true); + try { + assertEquals(expectedMaxDoc, reader.maxDoc()); + } finally { + reader.close(); + } + } + + protected void prepareIndexAndSnapshots(SnapshotDeletionPolicy sdp, + IndexWriter writer, int numSnapshots, String snapshotPrefix) + throws RuntimeException, IOException { + for (int i = 0; i < numSnapshots; i++) { + // create dummy document to trigger commit. + writer.addDocument(new Document()); + writer.commit(); + sdp.snapshot(snapshotPrefix + i); + } + } + + protected SnapshotDeletionPolicy getDeletionPolicy() throws IOException { + return getDeletionPolicy(null); + } + + protected SnapshotDeletionPolicy getDeletionPolicy(Map snapshots) throws IOException { + return new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy(), snapshots); + } + + protected void assertSnapshotExists(Directory dir, SnapshotDeletionPolicy sdp, int numSnapshots) throws Exception { + for (int i = 0; i < numSnapshots; i++) { + IndexCommit snapshot = sdp.getSnapshot("snapshot" + i); + checkMaxDoc(snapshot, i + 1); + checkSnapshotExists(dir, snapshot); + } + } + @Test public void testSnapshotDeletionPolicy() throws Exception { File dir = _TestUtil.getTempDir(INDEX_PATH); @@ -65,61 +98,13 @@ public class TestSnapshotDeletionPolicy extends LuceneTestCaseJ4 { dir2.close(); } - @Test - public void testReuseAcrossWriters() throws Exception { - Directory dir = new MockRAMDirectory(); - - SnapshotDeletionPolicy dp = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()); - IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig( - TEST_VERSION_CURRENT, - new MockAnalyzer()).setIndexDeletionPolicy(dp) - .setMaxBufferedDocs(2)); - Document doc = new Document(); - doc.add(new Field("content", "aaa", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); - for(int i=0;i<7;i++) { - writer.addDocument(doc); - if (i % 2 == 0) { - writer.commit(); - } - } - IndexCommit cp = dp.snapshot(); - copyFiles(dir, cp); - writer.close(); - copyFiles(dir, cp); - - writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, - new MockAnalyzer()).setIndexDeletionPolicy(dp)); - copyFiles(dir, cp); - for(int i=0;i<7;i++) { - writer.addDocument(doc); - if (i % 2 == 0) { - writer.commit(); - } - } - copyFiles(dir, cp); - writer.close(); - copyFiles(dir, cp); - dp.release(); - writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, - new MockAnalyzer()).setIndexDeletionPolicy(dp)); - writer.close(); - try { - copyFiles(dir, cp); - fail("did not hit expected IOException"); - } catch (IOException ioe) { - // expected - } - dir.close(); - } - private void runTest(Directory dir) throws Exception { // Run for ~1 seconds final long stopTime = System.currentTimeMillis() + 1000; - SnapshotDeletionPolicy dp = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()); + SnapshotDeletionPolicy dp = getDeletionPolicy(); final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig( - TEST_VERSION_CURRENT, - new MockAnalyzer()).setIndexDeletionPolicy(dp) + TEST_VERSION_CURRENT, new MockAnalyzer()).setIndexDeletionPolicy(dp) .setMaxBufferedDocs(2)); writer.commit(); @@ -177,20 +162,21 @@ public class TestSnapshotDeletionPolicy extends LuceneTestCaseJ4 { TestIndexWriter.assertNoUnreferencedFiles(dir, "some files were not deleted but should have been"); } - /** Example showing how to use the SnapshotDeletionPolicy - * to take a backup. This method does not really do a - * backup; instead, it reads every byte of every file - * just to test that the files indeed exist and are - * readable even while the index is changing. */ + /** + * Example showing how to use the SnapshotDeletionPolicy to take a backup. + * This method does not really do a backup; instead, it reads every byte of + * every file just to test that the files indeed exist and are readable even + * while the index is changing. + */ public void backupIndex(Directory dir, SnapshotDeletionPolicy dp) throws Exception { // To backup an index we first take a snapshot: try { - copyFiles(dir, dp.snapshot()); + copyFiles(dir, dp.snapshot("id")); } finally { // Make sure to release the snapshot, otherwise these // files will never be deleted during this IndexWriter // session: - dp.release(); + dp.release("id"); } } @@ -237,13 +223,210 @@ public class TestSnapshotDeletionPolicy extends LuceneTestCaseJ4 { input.close(); } } + - @Test(expected=IllegalStateException.class) - public void testNoCommits() throws Exception { - // Tests that if there were no commits when snapshot() is called, then - // IllegalStateException is thrown rather than NPE. - SnapshotDeletionPolicy sdp = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()); - sdp.snapshot(); + @Test + public void testBasicSnapshots() throws Exception { + int numSnapshots = 3; + SnapshotDeletionPolicy sdp = getDeletionPolicy(); + + // Create 3 snapshots: snapshot0, snapshot1, snapshot2 + Directory dir = new MockRAMDirectory(); + IndexWriter writer = new IndexWriter(dir, getConfig(sdp)); + prepareIndexAndSnapshots(sdp, writer, numSnapshots, "snapshot"); + writer.close(); + + assertSnapshotExists(dir, sdp, numSnapshots); + + // open a reader on a snapshot - should succeed. + IndexReader.open(sdp.getSnapshot("snapshot0"), true).close(); + + // open a new IndexWriter w/ no snapshots to keep and assert that all snapshots are gone. + sdp = getDeletionPolicy(); + writer = new IndexWriter(dir, getConfig(sdp)); + writer.deleteUnusedFiles(); + writer.close(); + assertEquals("no snapshots should exist", 1, IndexReader.listCommits(dir).size()); + + for (int i = 0; i < numSnapshots; i++) { + try { + sdp.getSnapshot("snapshot" + i); + fail("snapshot shouldn't have existed, but did: snapshot" + i); + } catch (IllegalStateException e) { + // expected - snapshot should not exist + } + } + } + + @Test + public void testMultiThreadedSnapshotting() throws Exception { + Directory dir = new MockRAMDirectory(); + final SnapshotDeletionPolicy sdp = getDeletionPolicy(); + final IndexWriter writer = new IndexWriter(dir, getConfig(sdp)); + + Thread[] threads = new Thread[10]; + for (int i = 0; i < threads.length; i++) { + threads[i] = new Thread() { + @Override + public void run() { + try { + writer.addDocument(new Document()); + writer.commit(); + sdp.snapshot(getName()); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + }; + threads[i].setName("t" + i); + } + + for (Thread t : threads) { + t.start(); + } + + for (Thread t : threads) { + t.join(); + } + + // Do one last commit, so that after we release all snapshots, we stay w/ one commit + writer.addDocument(new Document()); + writer.commit(); + + for (Thread t : threads) { + sdp.release(t.getName()); + writer.deleteUnusedFiles(); + } + assertEquals(1, IndexReader.listCommits(dir).size()); + writer.close(); + } + + @Test + public void testRollbackToOldSnapshot() throws Exception { + int numSnapshots = 2; + Directory dir = new MockRAMDirectory(); + SnapshotDeletionPolicy sdp = getDeletionPolicy(); + IndexWriter writer = new IndexWriter(dir, getConfig(sdp)); + prepareIndexAndSnapshots(sdp, writer, numSnapshots, "snapshot"); + writer.close(); + + // now open the writer on "snapshot0" - make sure it succeeds + writer = new IndexWriter(dir, getConfig(sdp).setIndexCommit(sdp.getSnapshot("snapshot0"))); + // this does the actual rollback + writer.commit(); + writer.deleteUnusedFiles(); + assertSnapshotExists(dir, sdp, numSnapshots - 1); + + // but 'snapshot1' files will still exist (need to release snapshot before they can be deleted). + String segFileName = sdp.getSnapshot("snapshot1").getSegmentsFileName(); + assertTrue("snapshot files should exist in the directory: " + segFileName, dir.fileExists(segFileName)); + } + + @Test + public void testReleaseSnapshot() throws Exception { + Directory dir = new MockRAMDirectory(); + SnapshotDeletionPolicy sdp = getDeletionPolicy(); + IndexWriter writer = new IndexWriter(dir, getConfig(sdp)); + prepareIndexAndSnapshots(sdp, writer, 1, "snapshot"); + + // Create another commit - we must do that, because otherwise the "snapshot" + // files will still remain in the index, since it's the last commit. + writer.addDocument(new Document()); + writer.commit(); + + // Release + String snapId = "snapshot0"; + String segFileName = sdp.getSnapshot(snapId).getSegmentsFileName(); + sdp.release(snapId); + try { + sdp.getSnapshot(snapId); + fail("should not have succeeded to get an unsnapshotted id"); + } catch (IllegalStateException e) { + // expected + } + assertNull(sdp.getSnapshots().get(snapId)); + writer.deleteUnusedFiles(); + assertFalse("segments file should not be found in dirctory: " + segFileName, dir.fileExists(segFileName)); + } + + @Test + public void testExistingSnapshots() throws Exception { + // Tests the ability to construct a SDP from existing snapshots, and + // asserts that those snapshots/commit points are protected. + int numSnapshots = 3; + Directory dir = new MockRAMDirectory(); + SnapshotDeletionPolicy sdp = getDeletionPolicy(); + IndexWriter writer = new IndexWriter(dir, getConfig(sdp)); + prepareIndexAndSnapshots(sdp, writer, numSnapshots, "snapshot"); + writer.close(); + + // Make a new policy and initialize with snapshots. + sdp = getDeletionPolicy(sdp.getSnapshots()); + writer = new IndexWriter(dir, getConfig(sdp)); + // attempt to delete unused files - the snapshotted files should not be deleted + writer.deleteUnusedFiles(); + writer.close(); + assertSnapshotExists(dir, sdp, numSnapshots); + } + + @Test + public void testSnapshotLastCommitTwice() throws Exception { + Directory dir = new MockRAMDirectory(); + SnapshotDeletionPolicy sdp = getDeletionPolicy(); + IndexWriter writer = new IndexWriter(dir, getConfig(sdp)); + writer.addDocument(new Document()); + writer.commit(); + + String s1 = "s1"; + String s2 = "s2"; + IndexCommit ic1 = sdp.snapshot(s1); + IndexCommit ic2 = sdp.snapshot(s2); + assertTrue(ic1 == ic2); // should be the same instance + + // create another commit + writer.addDocument(new Document()); + writer.commit(); + + // release "s1" should not delete "s2" + sdp.release(s1); + writer.deleteUnusedFiles(); + checkSnapshotExists(dir, ic2); + + writer.close(); } + @Test + public void testMissingCommits() throws Exception { + // Tests the behavior of SDP when commits that are given at ctor are missing + // on onInit(). + Directory dir = new MockRAMDirectory(); + SnapshotDeletionPolicy sdp = getDeletionPolicy(); + IndexWriter writer = new IndexWriter(dir, getConfig(sdp)); + writer.addDocument(new Document()); + writer.commit(); + IndexCommit ic = sdp.snapshot("s1"); + + // create another commit, not snapshotted. + writer.addDocument(new Document()); + writer.close(); + + // open a new writer w/ KeepOnlyLastCommit policy, so it will delete "s1" + // commit. + new IndexWriter(dir, getConfig(null)).close(); + + assertFalse("snapshotted commit should not exist", dir.fileExists(ic.getSegmentsFileName())); + + // Now reinit SDP from the commits in the index - the snapshot id should not + // exist anymore. + sdp = getDeletionPolicy(sdp.getSnapshots()); + new IndexWriter(dir, getConfig(sdp)).close(); + + try { + sdp.getSnapshot("s1"); + fail("snapshot s1 should not exist"); + } catch (IllegalStateException e) { + // expected. + } + } + }