From b04a15016d437f0b574c93c52c4c5f41021fc379 Mon Sep 17 00:00:00 2001
From: Doron Cohen Expert: represents a single commit into an index as seen by the
+ * {@link IndexDeletionPolicy}.
+ *
+ * Changes to the content of an index are made visible only
+ * after the writer who made that change had written to the
+ * directory a new segments file (
+ * Each index commit point has a unique segments file associated
+ * with it. The segments file associated with a later
+ * index commit point would have a larger N.
*/
public interface IndexCommitPoint {
/**
- * Get the segments file (ie,
+ * Upon calling this, the writer is notified that this commit
+ * point should be deleted.
+ *
+ * Decision that a commit-point should be deleted is taken by the {@link IndexDeletionPolicy} in effect
+ * and therefore this should only be called by its {@link IndexDeletionPolicy#onInit onInit()} or
+ * {@link IndexDeletionPolicy#onCommit onCommit()} methods.
*/
public void delete();
}
diff --git a/src/java/org/apache/lucene/index/IndexDeletionPolicy.java b/src/java/org/apache/lucene/index/IndexDeletionPolicy.java
index 76750845399..8d4daabcfc6 100644
--- a/src/java/org/apache/lucene/index/IndexDeletionPolicy.java
+++ b/src/java/org/apache/lucene/index/IndexDeletionPolicy.java
@@ -21,10 +21,13 @@ import java.util.List;
import java.io.IOException;
/**
- * Expert: implement this interface, and pass it to one
+ * Expert: policy for deletion of stale {@link IndexCommitPoint index commits}.
+ *
+ * Implement this interface, and pass it to one
* of the {@link IndexWriter} or {@link IndexReader}
- * constructors, to customize when "point in time" commits
- * are deleted from an index. The default deletion policy
+ * constructors, to customize when older
+ * {@link IndexCommitPoint point-in-time commits}
+ * are deleted from the index directory. The default deletion policy
* is {@link KeepOnlyLastCommitDeletionPolicy}, which always
* removes old commits as soon as a new commit is done (this
* matches the behavior before 2.2).segments_N
). This point in
+ * time, when the action of writing of a new segments file to the
+ * directory is completed, is therefore an index commit point.
+ * segments_N
) of
- * this commit point.
+ * Get the segments file (segments_N
) associated
+ * with this commit point.
*/
public String getSegmentsFileName();
/**
- * Notify the writer that this commit point should be
- * deleted. This should only be called by the {@link
- * IndexDeletionPolicy} during its {@link
- * IndexDeletionPolicy#onInit} or {@link
- * IndexDeletionPolicy#onCommit} method.
+ * Delete this commit point.
+ *
The writer locates all commits present in the index - * and calls this method. The policy may choose to delete - * commit points. To delete a commit point, call the - * {@link IndexCommitPoint#delete} method.
+ *The writer locates all index commits present in the + * index directory and calls this method. The policy may + * choose to delete some of the commit points, doing so by + * calling method {@link IndexCommitPoint#delete delete()} + * of {@link IndexCommitPoint}.
+ * + *Note: the last CommitPoint is the most recent one, + * i.e. the "front index state". Be careful not to delete it, + * unless you know for sure what you are doing, and unless + * you can afford to lose the index content while doing that. * - * @param commits List of {@link IndexCommitPoint}, + * @param commits List of current + * {@link IndexCommitPoint point-in-time commits}, * sorted by age (the 0th one is the oldest commit). */ public void onInit(List commits) throws IOException; /** - *
This is called each time the writer commits. This - * gives the policy a chance to remove old commit points + *
This is called each time the writer completed a commit. + * This gives the policy a chance to remove old commit points * with each commit.
* + *The policy may now choose to delete old commit points + * by calling method {@link IndexCommitPoint#delete delete()} + * of {@link IndexCommitPoint}.
+ * *If writer has autoCommit = true
then
* this method will in general be called many times during
* one instance of {@link IndexWriter}. If
* autoCommit = false
then this method is
* only called once when {@link IndexWriter#close} is
* called, or not at all if the {@link IndexWriter#abort}
- * is called. The policy may now choose to delete old
- * commit points by calling {@link IndexCommitPoint#delete}.
+ * is called.
*
- * @param commits List of {@link IndexCommitPoint}>,
+ *
Note: the last CommitPoint is the most recent one,
+ * i.e. the "front index state". Be careful not to delete it,
+ * unless you know for sure what you are doing, and unless
+ * you can afford to lose the index content while doing that.
+ *
+ * @param commits List of {@link IndexCommitPoint},
* sorted by age (the 0th one is the oldest commit).
*/
public void onCommit(List commits) throws IOException;
diff --git a/src/java/org/apache/lucene/index/IndexFileDeleter.java b/src/java/org/apache/lucene/index/IndexFileDeleter.java
index 3cda1b5b524..2aa8ae4d4c7 100644
--- a/src/java/org/apache/lucene/index/IndexFileDeleter.java
+++ b/src/java/org/apache/lucene/index/IndexFileDeleter.java
@@ -33,20 +33,31 @@ import java.util.Collections;
/*
* This class keeps track of each SegmentInfos instance that
- * is still "live", either because it corresponds to a
- * segments_N in the Directory (a real commit) or because
- * it's the in-memory SegmentInfos that a writer is actively
- * updating but has not yet committed (currently this only
- * applies when autoCommit=false in IndexWriter). This
- * class uses simple reference counting to map the live
- * SegmentInfos instances to individual files in the
- * Directory.
+ * is still "live", either because it corresponds to a
+ * segments_N file in the Directory (a "commit", i.e. a
+ * committed SegmentInfos) or because it's the in-memory SegmentInfos
+ * that a writer is actively updating but has not yet committed
+ * (currently this only applies when autoCommit=false in IndexWriter).
+ * This class uses simple reference counting to map the live
+ * SegmentInfos instances to individual files in the Directory.
+ *
+ * The same directory file may be referenced by more than
+ * one IndexCommitPoints, i.e. more than one SegmentInfos.
+ * Therefore we count how many commits reference each file.
+ * When all the commits referencing a certain file have been
+ * deleted, the refcount for that file becomes zero, and the
+ * file is deleted.
*
* A separate deletion policy interface
* (IndexDeletionPolicy) is consulted on creation (onInit)
* and once per commit (onCommit), to decide when a commit
* should be removed.
*
+ * It is the business of the IndexDeletionPolicy to choose
+ * when to delete commit points. The actual mechanics of
+ * file deletion, retrying, etc, derived from the deletion
+ * of commit points is the business of the IndexFileDeleter.
+ *
* The current default deletion policy is {@link
* KeepOnlyLastCommitDeletionPolicy}, which removes all
* prior commits when a new commit has completed. This
@@ -64,8 +75,9 @@ final class IndexFileDeleter {
* so we will retry them again later: */
private List deletable;
- /* Reference count for all files in the index. Maps
- * String to RefCount (class below) instances: */
+ /* Reference count for all files in the index.
+ * Counts how many existing commits reference a file.
+ * Maps String to RefCount (class below) instances: */
private Map refCounts = new HashMap();
/* Holds all commits (segments_N) currently in the index.
@@ -79,8 +91,10 @@ final class IndexFileDeleter {
* non-commit checkpoint: */
private List lastFiles = new ArrayList();
+ /* Commits that the IndexDeletionPolicy have decided to delete: */
+ private List commitsToDelete = new ArrayList();
+
private PrintStream infoStream;
- private List toDelete = new ArrayList();
private Directory directory;
private IndexDeletionPolicy policy;
@@ -188,19 +202,19 @@ final class IndexFileDeleter {
}
/**
- * Remove the CommitPoints in the toDelete List by
+ * Remove the CommitPoints in the commitsToDelete List by
* DecRef'ing all files from each SegmentInfos.
*/
private void deleteCommits() throws IOException {
- int size = toDelete.size();
+ int size = commitsToDelete.size();
if (size > 0) {
// First decref all files that had been referred to by
// the now-deleted commits:
for(int i=0;i Note: if autoCommit=false
, flushed data would still
+ * not be visible to readers, until {@link #close} is called.
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
*/
diff --git a/src/java/org/apache/lucene/index/SegmentInfo.java b/src/java/org/apache/lucene/index/SegmentInfo.java
index 2e236bbb53a..36a503f29f3 100644
--- a/src/java/org/apache/lucene/index/SegmentInfo.java
+++ b/src/java/org/apache/lucene/index/SegmentInfo.java
@@ -25,6 +25,12 @@ import java.util.List;
import java.util.ArrayList;
final class SegmentInfo {
+
+ static final int NO = -1; // e.g. no norms; no deletes;
+ static final int YES = 1; // e.g. have norms; have deletes;
+ static final int CHECK_DIR = 0; // e.g. must check dir to see if there are norms/deletions
+ static final int WITHOUT_GEN = 0; // a file name that has no GEN in it.
+
public String name; // unique name in dir
public int docCount; // number of docs in seg
public Directory dir; // where segment resides
@@ -32,17 +38,21 @@ final class SegmentInfo {
private boolean preLockless; // true if this is a segments file written before
// lock-less commits (2.1)
- private long delGen; // current generation of del file; -1 if there
- // are no deletes; 0 if it's a pre-2.1 segment
- // (and we must check filesystem); 1 or higher if
+ private long delGen; // current generation of del file; NO if there
+ // are no deletes; CHECK_DIR if it's a pre-2.1 segment
+ // (and we must check filesystem); YES or higher if
// there are deletes at generation N
- private long[] normGen; // current generations of each field's norm file.
- // If this array is null, we must check filesystem
- // when preLockLess is true. Else,
- // there are no separate norms
+ private long[] normGen; // current generation of each field's norm file.
+ // If this array is null, for lockLess this means no
+ // separate norms. For preLockLess this means we must
+ // check filesystem. If this array is not null, its
+ // values mean: NO says this field has no separate
+ // norms; CHECK_DIR says it is a preLockLess segment and
+ // filesystem must be checked; >= YES says this field
+ // has separate norms with the specified generation
- private byte isCompoundFile; // -1 if it is not; 1 if it is; 0 if it's
+ private byte isCompoundFile; // NO if it is not; YES if it is; CHECK_DIR if it's
// pre-2.1 (ie, must check file system to see
// if