From b04a15016d437f0b574c93c52c4c5f41021fc379 Mon Sep 17 00:00:00 2001 From: Doron Cohen Date: Thu, 15 Mar 2007 19:22:26 +0000 Subject: [PATCH] LUCENE-710 followup: code cosmetics and added documentation. git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@518734 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/lucene/index/IndexCommitPoint.java | 31 +++-- .../lucene/index/IndexDeletionPolicy.java | 44 +++++-- .../apache/lucene/index/IndexFileDeleter.java | 57 ++++++--- .../apache/lucene/index/IndexFileNames.java | 8 +- .../org/apache/lucene/index/IndexWriter.java | 29 ++++- .../org/apache/lucene/index/SegmentInfo.java | 120 ++++++++++-------- 6 files changed, 188 insertions(+), 101 deletions(-) diff --git a/src/java/org/apache/lucene/index/IndexCommitPoint.java b/src/java/org/apache/lucene/index/IndexCommitPoint.java index 49e171a5980..395a09db2cc 100644 --- a/src/java/org/apache/lucene/index/IndexCommitPoint.java +++ b/src/java/org/apache/lucene/index/IndexCommitPoint.java @@ -18,24 +18,37 @@ package org.apache.lucene.index; */ /** - * Represents a single commit into an index as seen by the - * {@link IndexDeletionPolicy}. + *

Expert: represents a single commit into an index as seen by the + * {@link IndexDeletionPolicy}. + *

+ * Changes to the content of an index are made visible only + * after the writer who made that change had written to the + * directory a new segments file (segments_N). This point in + * time, when the action of writing of a new segments file to the + * directory is completed, is therefore an index commit point. + *

+ * Each index commit point has a unique segments file associated + * with it. The segments file associated with a later + * index commit point would have a larger N. */ public interface IndexCommitPoint { /** - * Get the segments file (ie, segments_N) of - * this commit point. + * Get the segments file (segments_N) associated + * with this commit point. */ public String getSegmentsFileName(); /** - * Notify the writer that this commit point should be - * deleted. This should only be called by the {@link - * IndexDeletionPolicy} during its {@link - * IndexDeletionPolicy#onInit} or {@link - * IndexDeletionPolicy#onCommit} method. + * Delete this commit point. + *

+ * Upon calling this, the writer is notified that this commit + * point should be deleted. + *

+ * Decision that a commit-point should be deleted is taken by the {@link IndexDeletionPolicy} in effect + * and therefore this should only be called by its {@link IndexDeletionPolicy#onInit onInit()} or + * {@link IndexDeletionPolicy#onCommit onCommit()} methods. */ public void delete(); } diff --git a/src/java/org/apache/lucene/index/IndexDeletionPolicy.java b/src/java/org/apache/lucene/index/IndexDeletionPolicy.java index 76750845399..8d4daabcfc6 100644 --- a/src/java/org/apache/lucene/index/IndexDeletionPolicy.java +++ b/src/java/org/apache/lucene/index/IndexDeletionPolicy.java @@ -21,10 +21,13 @@ import java.util.List; import java.io.IOException; /** - *

Expert: implement this interface, and pass it to one + *

Expert: policy for deletion of stale {@link IndexCommitPoint index commits}. + * + *

Implement this interface, and pass it to one * of the {@link IndexWriter} or {@link IndexReader} - * constructors, to customize when "point in time" commits - * are deleted from an index. The default deletion policy + * constructors, to customize when older + * {@link IndexCommitPoint point-in-time commits} + * are deleted from the index directory. The default deletion policy * is {@link KeepOnlyLastCommitDeletionPolicy}, which always * removes old commits as soon as a new commit is done (this * matches the behavior before 2.2).

@@ -52,31 +55,46 @@ public interface IndexDeletionPolicy { * instantiated to give the policy a chance to remove old * commit points.

* - *

The writer locates all commits present in the index - * and calls this method. The policy may choose to delete - * commit points. To delete a commit point, call the - * {@link IndexCommitPoint#delete} method.

+ *

The writer locates all index commits present in the + * index directory and calls this method. The policy may + * choose to delete some of the commit points, doing so by + * calling method {@link IndexCommitPoint#delete delete()} + * of {@link IndexCommitPoint}.

+ * + *

Note: the last CommitPoint is the most recent one, + * i.e. the "front index state". Be careful not to delete it, + * unless you know for sure what you are doing, and unless + * you can afford to lose the index content while doing that. * - * @param commits List of {@link IndexCommitPoint}, + * @param commits List of current + * {@link IndexCommitPoint point-in-time commits}, * sorted by age (the 0th one is the oldest commit). */ public void onInit(List commits) throws IOException; /** - *

This is called each time the writer commits. This - * gives the policy a chance to remove old commit points + *

This is called each time the writer completed a commit. + * This gives the policy a chance to remove old commit points * with each commit.

* + *

The policy may now choose to delete old commit points + * by calling method {@link IndexCommitPoint#delete delete()} + * of {@link IndexCommitPoint}.

+ * *

If writer has autoCommit = true then * this method will in general be called many times during * one instance of {@link IndexWriter}. If * autoCommit = false then this method is * only called once when {@link IndexWriter#close} is * called, or not at all if the {@link IndexWriter#abort} - * is called. The policy may now choose to delete old - * commit points by calling {@link IndexCommitPoint#delete}. + * is called. * - * @param commits List of {@link IndexCommitPoint}>, + *

Note: the last CommitPoint is the most recent one, + * i.e. the "front index state". Be careful not to delete it, + * unless you know for sure what you are doing, and unless + * you can afford to lose the index content while doing that. + * + * @param commits List of {@link IndexCommitPoint}, * sorted by age (the 0th one is the oldest commit). */ public void onCommit(List commits) throws IOException; diff --git a/src/java/org/apache/lucene/index/IndexFileDeleter.java b/src/java/org/apache/lucene/index/IndexFileDeleter.java index 3cda1b5b524..2aa8ae4d4c7 100644 --- a/src/java/org/apache/lucene/index/IndexFileDeleter.java +++ b/src/java/org/apache/lucene/index/IndexFileDeleter.java @@ -33,20 +33,31 @@ import java.util.Collections; /* * This class keeps track of each SegmentInfos instance that - * is still "live", either because it corresponds to a - * segments_N in the Directory (a real commit) or because - * it's the in-memory SegmentInfos that a writer is actively - * updating but has not yet committed (currently this only - * applies when autoCommit=false in IndexWriter). This - * class uses simple reference counting to map the live - * SegmentInfos instances to individual files in the - * Directory. + * is still "live", either because it corresponds to a + * segments_N file in the Directory (a "commit", i.e. a + * committed SegmentInfos) or because it's the in-memory SegmentInfos + * that a writer is actively updating but has not yet committed + * (currently this only applies when autoCommit=false in IndexWriter). + * This class uses simple reference counting to map the live + * SegmentInfos instances to individual files in the Directory. + * + * The same directory file may be referenced by more than + * one IndexCommitPoints, i.e. more than one SegmentInfos. + * Therefore we count how many commits reference each file. + * When all the commits referencing a certain file have been + * deleted, the refcount for that file becomes zero, and the + * file is deleted. * * A separate deletion policy interface * (IndexDeletionPolicy) is consulted on creation (onInit) * and once per commit (onCommit), to decide when a commit * should be removed. * + * It is the business of the IndexDeletionPolicy to choose + * when to delete commit points. The actual mechanics of + * file deletion, retrying, etc, derived from the deletion + * of commit points is the business of the IndexFileDeleter. + * * The current default deletion policy is {@link * KeepOnlyLastCommitDeletionPolicy}, which removes all * prior commits when a new commit has completed. This @@ -64,8 +75,9 @@ final class IndexFileDeleter { * so we will retry them again later: */ private List deletable; - /* Reference count for all files in the index. Maps - * String to RefCount (class below) instances: */ + /* Reference count for all files in the index. + * Counts how many existing commits reference a file. + * Maps String to RefCount (class below) instances: */ private Map refCounts = new HashMap(); /* Holds all commits (segments_N) currently in the index. @@ -79,8 +91,10 @@ final class IndexFileDeleter { * non-commit checkpoint: */ private List lastFiles = new ArrayList(); + /* Commits that the IndexDeletionPolicy have decided to delete: */ + private List commitsToDelete = new ArrayList(); + private PrintStream infoStream; - private List toDelete = new ArrayList(); private Directory directory; private IndexDeletionPolicy policy; @@ -188,19 +202,19 @@ final class IndexFileDeleter { } /** - * Remove the CommitPoints in the toDelete List by + * Remove the CommitPoints in the commitsToDelete List by * DecRef'ing all files from each SegmentInfos. */ private void deleteCommits() throws IOException { - int size = toDelete.size(); + int size = commitsToDelete.size(); if (size > 0) { // First decref all files that had been referred to by // the now-deleted commits: for(int i=0;i */ +/* + * Clarification: Check Points (and commits) + * Being able to set autoCommit=false allows IndexWriter to flush and + * write new index files to the directory without writing a new segments_N + * file which references these new files. It also means that the state of + * the in memory SegmentInfos object is different than the most recent + * segments_N file written to the directory. + * + * Each time the SegmentInfos is changed, and matches the (possibly + * modified) directory files, we have a new "check point". + * If the modified/new SegmentInfos is written to disk - as a new + * (generation of) segments_N file - this check point is also an + * IndexCommitPoint. + * + * With autoCommit=true, every checkPoint is also a CommitPoint. + * With autoCommit=false, some checkPoints may not be commits. + * + * A new checkpoint always replaces the previous checkpoint and + * becomes the new "front" of the index. This allows the IndexFileDeleter + * to delete files that are referenced only by stale checkpoints. + * (files that were created since the last commit, but are no longer + * referenced by the "front" of the index). For this, IndexFileDeleter + * keeps track of the last non commit checkpoint. + */ public class IndexWriter { /** @@ -1427,7 +1451,6 @@ public class IndexWriter { flushRamSegments(); // 2 copy segment infos and find the highest level from dirs - int start = segmentInfos.size(); int startUpperBound = minMergeDocs; boolean success = false; @@ -1655,7 +1678,9 @@ public class IndexWriter { /** * Flush all in-memory buffered updates (adds and deletes) - * to the Directory. + * to the Directory. + *

Note: if autoCommit=false, flushed data would still + * not be visible to readers, until {@link #close} is called. * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ diff --git a/src/java/org/apache/lucene/index/SegmentInfo.java b/src/java/org/apache/lucene/index/SegmentInfo.java index 2e236bbb53a..36a503f29f3 100644 --- a/src/java/org/apache/lucene/index/SegmentInfo.java +++ b/src/java/org/apache/lucene/index/SegmentInfo.java @@ -25,6 +25,12 @@ import java.util.List; import java.util.ArrayList; final class SegmentInfo { + + static final int NO = -1; // e.g. no norms; no deletes; + static final int YES = 1; // e.g. have norms; have deletes; + static final int CHECK_DIR = 0; // e.g. must check dir to see if there are norms/deletions + static final int WITHOUT_GEN = 0; // a file name that has no GEN in it. + public String name; // unique name in dir public int docCount; // number of docs in seg public Directory dir; // where segment resides @@ -32,17 +38,21 @@ final class SegmentInfo { private boolean preLockless; // true if this is a segments file written before // lock-less commits (2.1) - private long delGen; // current generation of del file; -1 if there - // are no deletes; 0 if it's a pre-2.1 segment - // (and we must check filesystem); 1 or higher if + private long delGen; // current generation of del file; NO if there + // are no deletes; CHECK_DIR if it's a pre-2.1 segment + // (and we must check filesystem); YES or higher if // there are deletes at generation N - private long[] normGen; // current generations of each field's norm file. - // If this array is null, we must check filesystem - // when preLockLess is true. Else, - // there are no separate norms + private long[] normGen; // current generation of each field's norm file. + // If this array is null, for lockLess this means no + // separate norms. For preLockLess this means we must + // check filesystem. If this array is not null, its + // values mean: NO says this field has no separate + // norms; CHECK_DIR says it is a preLockLess segment and + // filesystem must be checked; >= YES says this field + // has separate norms with the specified generation - private byte isCompoundFile; // -1 if it is not; 1 if it is; 0 if it's + private byte isCompoundFile; // NO if it is not; YES if it is; CHECK_DIR if it's // pre-2.1 (ie, must check file system to see // if .cfs and .nrm exist) @@ -59,15 +69,15 @@ final class SegmentInfo { this.name = name; this.docCount = docCount; this.dir = dir; - delGen = -1; - isCompoundFile = 0; + delGen = NO; + isCompoundFile = CHECK_DIR; preLockless = true; hasSingleNormFile = false; } public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile, boolean hasSingleNormFile) { this(name, docCount, dir); - this.isCompoundFile = (byte) (isCompoundFile ? 1 : -1); + this.isCompoundFile = (byte) (isCompoundFile ? YES : NO); this.hasSingleNormFile = hasSingleNormFile; preLockless = false; } @@ -112,7 +122,7 @@ final class SegmentInfo { hasSingleNormFile = false; } int numNormGen = input.readInt(); - if (numNormGen == -1) { + if (numNormGen == NO) { normGen = null; } else { normGen = new long[numNormGen]; @@ -121,11 +131,11 @@ final class SegmentInfo { } } isCompoundFile = input.readByte(); - preLockless = isCompoundFile == 0; + preLockless = (isCompoundFile == CHECK_DIR); } else { - delGen = 0; + delGen = CHECK_DIR; normGen = null; - isCompoundFile = 0; + isCompoundFile = CHECK_DIR; preLockless = true; hasSingleNormFile = false; } @@ -138,11 +148,15 @@ final class SegmentInfo { // norms set against it yet: normGen = new long[numFields]; - if (!preLockless) { + if (preLockless) { + // Do nothing: thus leaving normGen[k]==CHECK_DIR (==0), so that later we know + // we have to check filesystem for norm files, because this is prelockless. + + } else { // This is a FORMAT_LOCKLESS segment, which means // there are no separate norms: for(int i=0;i 0: this means this segment was written by + // delGen >= YES: this means this segment was written by // the LOCKLESS code and for certain has // deletions // - if (delGen == -1) { + if (delGen == NO) { return false; - } else if (delGen > 0) { + } else if (delGen >= YES) { return true; } else { return dir.fileExists(getDelFileName()); @@ -175,8 +189,8 @@ final class SegmentInfo { void advanceDelGen() { // delGen 0 is reserved for pre-LOCKLESS format - if (delGen == -1) { - delGen = 1; + if (delGen == NO) { + delGen = YES; } else { delGen++; } @@ -184,7 +198,7 @@ final class SegmentInfo { } void clearDelGen() { - delGen = -1; + delGen = NO; files = null; } @@ -201,13 +215,13 @@ final class SegmentInfo { } String getDelFileName() { - if (delGen == -1) { + if (delGen == NO) { // In this case we know there is no deletion filename // against this segment return null; } else { - // If delGen is 0, it's the pre-lockless-commit file format - return IndexFileNames.fileNameFromGeneration(name, "." + IndexFileNames.DELETES_EXTENSION, delGen); + // If delGen is CHECK_DIR, it's the pre-lockless-commit file format + return IndexFileNames.fileNameFromGeneration(name, "." + IndexFileNames.DELETES_EXTENSION, delGen); } } @@ -218,11 +232,11 @@ final class SegmentInfo { */ boolean hasSeparateNorms(int fieldNumber) throws IOException { - if ((normGen == null && preLockless) || (normGen != null && normGen[fieldNumber] == 0)) { + if ((normGen == null && preLockless) || (normGen != null && normGen[fieldNumber] == CHECK_DIR)) { // Must fallback to directory file exists check: String fileName = name + ".s" + fieldNumber; return dir.fileExists(fileName); - } else if (normGen == null || normGen[fieldNumber] == -1) { + } else if (normGen == null || normGen[fieldNumber] == NO) { return false; } else { return true; @@ -258,17 +272,17 @@ final class SegmentInfo { } } else { // This means this segment was saved with LOCKLESS - // code so we first check whether any normGen's are > - // 0 (meaning they definitely have separate norms): + // code so we first check whether any normGen's are >= 1 + // (meaning they definitely have separate norms): for(int i=0;i 0) { + if (normGen[i] >= YES) { return true; } } // Next we look for any == 0. These cases were // pre-LOCKLESS and must be checked in directory: for(int i=0;i 0 || dir.fileExists(delFileName))) { + if (delFileName != null && (delGen >= YES || dir.fileExists(delFileName))) { files.add(delFileName); } - // Careful logic for norms files: + // Careful logic for norms files if (normGen != null) { for(int i=0;i 0) { + if (gen >= YES) { // Definitely a separate norm file, with generation: files.add(IndexFileNames.fileNameFromGeneration(name, "." + IndexFileNames.SEPARATE_NORMS_EXTENSION + i, gen)); - } else if (-1 == gen) { - // No separate norms but maybe non-separate norms + } else if (NO == gen) { + // No separate norms but maybe plain norms // in the non compound file case: if (!hasSingleNormFile && !useCompoundFile) { - String fileName = name + "." + IndexFileNames.SINGLE_NORMS_EXTENSION + i; + String fileName = name + "." + IndexFileNames.PLAIN_NORMS_EXTENSION + i; if (dir.fileExists(fileName)) { files.add(fileName); } } - } else if (0 == gen) { + } else if (CHECK_DIR == gen) { // Pre-2.1: we have to check file existence String fileName = null; if (useCompoundFile) { fileName = name + "." + IndexFileNames.SEPARATE_NORMS_EXTENSION + i; } else if (!hasSingleNormFile) { - fileName = name + "." + IndexFileNames.SINGLE_NORMS_EXTENSION + i; + fileName = name + "." + IndexFileNames.PLAIN_NORMS_EXTENSION + i; } if (fileName != null && dir.fileExists(fileName)) { files.add(fileName); @@ -445,7 +459,7 @@ final class SegmentInfo { if (useCompoundFile) prefix = name + "." + IndexFileNames.SEPARATE_NORMS_EXTENSION; else - prefix = name + "." + IndexFileNames.SINGLE_NORMS_EXTENSION; + prefix = name + "." + IndexFileNames.PLAIN_NORMS_EXTENSION; int prefixLength = prefix.length(); String[] allFiles = dir.list(); if (allFiles == null)