LUCENE-710 followup: code cosmetics and added documentation.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@518734 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Doron Cohen 2007-03-15 19:22:26 +00:00
parent a6d56d5eb9
commit b04a15016d
6 changed files with 188 additions and 101 deletions

View File

@ -18,24 +18,37 @@ package org.apache.lucene.index;
*/
/**
* Represents a single commit into an index as seen by the
* {@link IndexDeletionPolicy}.
* <p>Expert: represents a single commit into an index as seen by the
* {@link IndexDeletionPolicy}.
* <p>
* Changes to the content of an index are made visible only
* after the writer who made that change had written to the
* directory a new segments file (<code>segments_N</code>). This point in
* time, when the action of writing of a new segments file to the
* directory is completed, is therefore an index commit point.
* <p>
* Each index commit point has a unique segments file associated
* with it. The segments file associated with a later
* index commit point would have a larger N.
*/
public interface IndexCommitPoint {
/**
* Get the segments file (ie, <code>segments_N</code>) of
* this commit point.
* Get the segments file (<code>segments_N</code>) associated
* with this commit point.
*/
public String getSegmentsFileName();
/**
* Notify the writer that this commit point should be
* deleted. This should only be called by the {@link
* IndexDeletionPolicy} during its {@link
* IndexDeletionPolicy#onInit} or {@link
* IndexDeletionPolicy#onCommit} method.
* Delete this commit point.
* <p>
* Upon calling this, the writer is notified that this commit
* point should be deleted.
* <p>
* Decision that a commit-point should be deleted is taken by the {@link IndexDeletionPolicy} in effect
* and therefore this should only be called by its {@link IndexDeletionPolicy#onInit onInit()} or
* {@link IndexDeletionPolicy#onCommit onCommit()} methods.
*/
public void delete();
}

View File

@ -21,10 +21,13 @@ import java.util.List;
import java.io.IOException;
/**
* <p>Expert: implement this interface, and pass it to one
* <p>Expert: policy for deletion of stale {@link IndexCommitPoint index commits}.
*
* <p>Implement this interface, and pass it to one
* of the {@link IndexWriter} or {@link IndexReader}
* constructors, to customize when "point in time" commits
* are deleted from an index. The default deletion policy
* constructors, to customize when older
* {@link IndexCommitPoint point-in-time commits}
* are deleted from the index directory. The default deletion policy
* is {@link KeepOnlyLastCommitDeletionPolicy}, which always
* removes old commits as soon as a new commit is done (this
* matches the behavior before 2.2).</p>
@ -52,31 +55,46 @@ public interface IndexDeletionPolicy {
* instantiated to give the policy a chance to remove old
* commit points.</p>
*
* <p>The writer locates all commits present in the index
* and calls this method. The policy may choose to delete
* commit points. To delete a commit point, call the
* {@link IndexCommitPoint#delete} method.</p>
* <p>The writer locates all index commits present in the
* index directory and calls this method. The policy may
* choose to delete some of the commit points, doing so by
* calling method {@link IndexCommitPoint#delete delete()}
* of {@link IndexCommitPoint}.</p>
*
* <p><u>Note:</u> the last CommitPoint is the most recent one,
* i.e. the "front index state". Be careful not to delete it,
* unless you know for sure what you are doing, and unless
* you can afford to lose the index content while doing that.
*
* @param commits List of {@link IndexCommitPoint},
* @param commits List of current
* {@link IndexCommitPoint point-in-time commits},
* sorted by age (the 0th one is the oldest commit).
*/
public void onInit(List commits) throws IOException;
/**
* <p>This is called each time the writer commits. This
* gives the policy a chance to remove old commit points
* <p>This is called each time the writer completed a commit.
* This gives the policy a chance to remove old commit points
* with each commit.</p>
*
* <p>The policy may now choose to delete old commit points
* by calling method {@link IndexCommitPoint#delete delete()}
* of {@link IndexCommitPoint}.</p>
*
* <p>If writer has <code>autoCommit = true</code> then
* this method will in general be called many times during
* one instance of {@link IndexWriter}. If
* <code>autoCommit = false</code> then this method is
* only called once when {@link IndexWriter#close} is
* called, or not at all if the {@link IndexWriter#abort}
* is called. The policy may now choose to delete old
* commit points by calling {@link IndexCommitPoint#delete}.
* is called.
*
* @param commits List of {@link IndexCommitPoint}>,
* <p><u>Note:</u> the last CommitPoint is the most recent one,
* i.e. the "front index state". Be careful not to delete it,
* unless you know for sure what you are doing, and unless
* you can afford to lose the index content while doing that.
*
* @param commits List of {@link IndexCommitPoint},
* sorted by age (the 0th one is the oldest commit).
*/
public void onCommit(List commits) throws IOException;

View File

@ -33,20 +33,31 @@ import java.util.Collections;
/*
* This class keeps track of each SegmentInfos instance that
* is still "live", either because it corresponds to a
* segments_N in the Directory (a real commit) or because
* it's the in-memory SegmentInfos that a writer is actively
* updating but has not yet committed (currently this only
* applies when autoCommit=false in IndexWriter). This
* class uses simple reference counting to map the live
* SegmentInfos instances to individual files in the
* Directory.
* is still "live", either because it corresponds to a
* segments_N file in the Directory (a "commit", i.e. a
* committed SegmentInfos) or because it's the in-memory SegmentInfos
* that a writer is actively updating but has not yet committed
* (currently this only applies when autoCommit=false in IndexWriter).
* This class uses simple reference counting to map the live
* SegmentInfos instances to individual files in the Directory.
*
* The same directory file may be referenced by more than
* one IndexCommitPoints, i.e. more than one SegmentInfos.
* Therefore we count how many commits reference each file.
* When all the commits referencing a certain file have been
* deleted, the refcount for that file becomes zero, and the
* file is deleted.
*
* A separate deletion policy interface
* (IndexDeletionPolicy) is consulted on creation (onInit)
* and once per commit (onCommit), to decide when a commit
* should be removed.
*
* It is the business of the IndexDeletionPolicy to choose
* when to delete commit points. The actual mechanics of
* file deletion, retrying, etc, derived from the deletion
* of commit points is the business of the IndexFileDeleter.
*
* The current default deletion policy is {@link
* KeepOnlyLastCommitDeletionPolicy}, which removes all
* prior commits when a new commit has completed. This
@ -64,8 +75,9 @@ final class IndexFileDeleter {
* so we will retry them again later: */
private List deletable;
/* Reference count for all files in the index. Maps
* String to RefCount (class below) instances: */
/* Reference count for all files in the index.
* Counts how many existing commits reference a file.
* Maps String to RefCount (class below) instances: */
private Map refCounts = new HashMap();
/* Holds all commits (segments_N) currently in the index.
@ -79,8 +91,10 @@ final class IndexFileDeleter {
* non-commit checkpoint: */
private List lastFiles = new ArrayList();
/* Commits that the IndexDeletionPolicy have decided to delete: */
private List commitsToDelete = new ArrayList();
private PrintStream infoStream;
private List toDelete = new ArrayList();
private Directory directory;
private IndexDeletionPolicy policy;
@ -188,19 +202,19 @@ final class IndexFileDeleter {
}
/**
* Remove the CommitPoints in the toDelete List by
* Remove the CommitPoints in the commitsToDelete List by
* DecRef'ing all files from each SegmentInfos.
*/
private void deleteCommits() throws IOException {
int size = toDelete.size();
int size = commitsToDelete.size();
if (size > 0) {
// First decref all files that had been referred to by
// the now-deleted commits:
for(int i=0;i<size;i++) {
CommitPoint commit = (CommitPoint) toDelete.get(i);
CommitPoint commit = (CommitPoint) commitsToDelete.get(i);
if (infoStream != null) {
message("deleteCommits: now remove commit \"" + commit.getSegmentsFileName() + "\"");
}
@ -210,9 +224,9 @@ final class IndexFileDeleter {
}
decRef(commit.getSegmentsFileName());
}
toDelete.clear();
commitsToDelete.clear();
// Now compact commits to remove deleted ones:
// Now compact commits to remove deleted ones (preserving the sort):
size = commits.size();
int readFrom = 0;
int writeTo = 0;
@ -258,6 +272,9 @@ final class IndexFileDeleter {
}
/**
* For definition of "check point" see IndexWriter comments:
* "Clarification: Check Points (and commits)".
*
* Writer calls this when it has made a "consistent
* change" to the index, meaning new files are written to
* the index and the in-memory SegmentInfos have been
@ -422,10 +439,10 @@ final class IndexFileDeleter {
public void deleteDirect(Directory otherDir, List segments) throws IOException {
int size = segments.size();
for(int i=0;i<size;i++) {
List toDelete = ((SegmentInfo) segments.get(i)).files();
int size2 = toDelete.size();
List filestoDelete = ((SegmentInfo) segments.get(i)).files();
int size2 = filestoDelete.size();
for(int j=0;j<size2;j++) {
otherDir.deleteFile((String) toDelete.get(j));
otherDir.deleteFile((String) filestoDelete.get(j));
}
}
}
@ -487,7 +504,7 @@ final class IndexFileDeleter {
public void delete() {
if (!deleted) {
deleted = true;
toDelete.add(this);
commitsToDelete.add(this);
}
}

View File

@ -44,8 +44,8 @@ final class IndexFileNames {
/** Extension of deletes */
static final String DELETES_EXTENSION = "del";
/** Extension of single norms */
static final String SINGLE_NORMS_EXTENSION = "f";
/** Extension of plain norms */
static final String PLAIN_NORMS_EXTENSION = "f";
/** Extension of separate norms */
static final String SEPARATE_NORMS_EXTENSION = "s";
@ -91,9 +91,9 @@ final class IndexFileNames {
* @param gen -- generation
*/
static final String fileNameFromGeneration(String base, String extension, long gen) {
if (gen == -1) {
if (gen == SegmentInfo.NO) {
return null;
} else if (gen == 0) {
} else if (gen == SegmentInfo.WITHOUT_GEN) {
return base + extension;
} else {
return base + "_" + Long.toString(gen, Character.MAX_RADIX) + extension;

View File

@ -126,6 +126,30 @@ import java.util.Map.Entry;
normally relies on. </p>
*/
/*
* Clarification: Check Points (and commits)
* Being able to set autoCommit=false allows IndexWriter to flush and
* write new index files to the directory without writing a new segments_N
* file which references these new files. It also means that the state of
* the in memory SegmentInfos object is different than the most recent
* segments_N file written to the directory.
*
* Each time the SegmentInfos is changed, and matches the (possibly
* modified) directory files, we have a new "check point".
* If the modified/new SegmentInfos is written to disk - as a new
* (generation of) segments_N file - this check point is also an
* IndexCommitPoint.
*
* With autoCommit=true, every checkPoint is also a CommitPoint.
* With autoCommit=false, some checkPoints may not be commits.
*
* A new checkpoint always replaces the previous checkpoint and
* becomes the new "front" of the index. This allows the IndexFileDeleter
* to delete files that are referenced only by stale checkpoints.
* (files that were created since the last commit, but are no longer
* referenced by the "front" of the index). For this, IndexFileDeleter
* keeps track of the last non commit checkpoint.
*/
public class IndexWriter {
/**
@ -1427,7 +1451,6 @@ public class IndexWriter {
flushRamSegments();
// 2 copy segment infos and find the highest level from dirs
int start = segmentInfos.size();
int startUpperBound = minMergeDocs;
boolean success = false;
@ -1655,7 +1678,9 @@ public class IndexWriter {
/**
* Flush all in-memory buffered updates (adds and deletes)
* to the Directory.
* to the Directory.
* <p>Note: if <code>autoCommit=false</code>, flushed data would still
* not be visible to readers, until {@link #close} is called.
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
*/

View File

@ -25,6 +25,12 @@ import java.util.List;
import java.util.ArrayList;
final class SegmentInfo {
static final int NO = -1; // e.g. no norms; no deletes;
static final int YES = 1; // e.g. have norms; have deletes;
static final int CHECK_DIR = 0; // e.g. must check dir to see if there are norms/deletions
static final int WITHOUT_GEN = 0; // a file name that has no GEN in it.
public String name; // unique name in dir
public int docCount; // number of docs in seg
public Directory dir; // where segment resides
@ -32,17 +38,21 @@ final class SegmentInfo {
private boolean preLockless; // true if this is a segments file written before
// lock-less commits (2.1)
private long delGen; // current generation of del file; -1 if there
// are no deletes; 0 if it's a pre-2.1 segment
// (and we must check filesystem); 1 or higher if
private long delGen; // current generation of del file; NO if there
// are no deletes; CHECK_DIR if it's a pre-2.1 segment
// (and we must check filesystem); YES or higher if
// there are deletes at generation N
private long[] normGen; // current generations of each field's norm file.
// If this array is null, we must check filesystem
// when preLockLess is true. Else,
// there are no separate norms
private long[] normGen; // current generation of each field's norm file.
// If this array is null, for lockLess this means no
// separate norms. For preLockLess this means we must
// check filesystem. If this array is not null, its
// values mean: NO says this field has no separate
// norms; CHECK_DIR says it is a preLockLess segment and
// filesystem must be checked; >= YES says this field
// has separate norms with the specified generation
private byte isCompoundFile; // -1 if it is not; 1 if it is; 0 if it's
private byte isCompoundFile; // NO if it is not; YES if it is; CHECK_DIR if it's
// pre-2.1 (ie, must check file system to see
// if <name>.cfs and <name>.nrm exist)
@ -59,15 +69,15 @@ final class SegmentInfo {
this.name = name;
this.docCount = docCount;
this.dir = dir;
delGen = -1;
isCompoundFile = 0;
delGen = NO;
isCompoundFile = CHECK_DIR;
preLockless = true;
hasSingleNormFile = false;
}
public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile, boolean hasSingleNormFile) {
this(name, docCount, dir);
this.isCompoundFile = (byte) (isCompoundFile ? 1 : -1);
this.isCompoundFile = (byte) (isCompoundFile ? YES : NO);
this.hasSingleNormFile = hasSingleNormFile;
preLockless = false;
}
@ -112,7 +122,7 @@ final class SegmentInfo {
hasSingleNormFile = false;
}
int numNormGen = input.readInt();
if (numNormGen == -1) {
if (numNormGen == NO) {
normGen = null;
} else {
normGen = new long[numNormGen];
@ -121,11 +131,11 @@ final class SegmentInfo {
}
}
isCompoundFile = input.readByte();
preLockless = isCompoundFile == 0;
preLockless = (isCompoundFile == CHECK_DIR);
} else {
delGen = 0;
delGen = CHECK_DIR;
normGen = null;
isCompoundFile = 0;
isCompoundFile = CHECK_DIR;
preLockless = true;
hasSingleNormFile = false;
}
@ -138,11 +148,15 @@ final class SegmentInfo {
// norms set against it yet:
normGen = new long[numFields];
if (!preLockless) {
if (preLockless) {
// Do nothing: thus leaving normGen[k]==CHECK_DIR (==0), so that later we know
// we have to check filesystem for norm files, because this is prelockless.
} else {
// This is a FORMAT_LOCKLESS segment, which means
// there are no separate norms:
for(int i=0;i<numFields;i++) {
normGen[i] = -1;
normGen[i] = NO;
}
}
}
@ -152,21 +166,21 @@ final class SegmentInfo {
throws IOException {
// Cases:
//
// delGen == -1: this means this segment was written
// delGen == NO: this means this segment was written
// by the LOCKLESS code and for certain does not have
// deletions yet
//
// delGen == 0: this means this segment was written by
// delGen == CHECK_DIR: this means this segment was written by
// pre-LOCKLESS code which means we must check
// directory to see if .del file exists
//
// delGen > 0: this means this segment was written by
// delGen >= YES: this means this segment was written by
// the LOCKLESS code and for certain has
// deletions
//
if (delGen == -1) {
if (delGen == NO) {
return false;
} else if (delGen > 0) {
} else if (delGen >= YES) {
return true;
} else {
return dir.fileExists(getDelFileName());
@ -175,8 +189,8 @@ final class SegmentInfo {
void advanceDelGen() {
// delGen 0 is reserved for pre-LOCKLESS format
if (delGen == -1) {
delGen = 1;
if (delGen == NO) {
delGen = YES;
} else {
delGen++;
}
@ -184,7 +198,7 @@ final class SegmentInfo {
}
void clearDelGen() {
delGen = -1;
delGen = NO;
files = null;
}
@ -201,13 +215,13 @@ final class SegmentInfo {
}
String getDelFileName() {
if (delGen == -1) {
if (delGen == NO) {
// In this case we know there is no deletion filename
// against this segment
return null;
} else {
// If delGen is 0, it's the pre-lockless-commit file format
return IndexFileNames.fileNameFromGeneration(name, "." + IndexFileNames.DELETES_EXTENSION, delGen);
// If delGen is CHECK_DIR, it's the pre-lockless-commit file format
return IndexFileNames.fileNameFromGeneration(name, "." + IndexFileNames.DELETES_EXTENSION, delGen);
}
}
@ -218,11 +232,11 @@ final class SegmentInfo {
*/
boolean hasSeparateNorms(int fieldNumber)
throws IOException {
if ((normGen == null && preLockless) || (normGen != null && normGen[fieldNumber] == 0)) {
if ((normGen == null && preLockless) || (normGen != null && normGen[fieldNumber] == CHECK_DIR)) {
// Must fallback to directory file exists check:
String fileName = name + ".s" + fieldNumber;
return dir.fileExists(fileName);
} else if (normGen == null || normGen[fieldNumber] == -1) {
} else if (normGen == null || normGen[fieldNumber] == NO) {
return false;
} else {
return true;
@ -258,17 +272,17 @@ final class SegmentInfo {
}
} else {
// This means this segment was saved with LOCKLESS
// code so we first check whether any normGen's are >
// 0 (meaning they definitely have separate norms):
// code so we first check whether any normGen's are >= 1
// (meaning they definitely have separate norms):
for(int i=0;i<normGen.length;i++) {
if (normGen[i] > 0) {
if (normGen[i] >= YES) {
return true;
}
}
// Next we look for any == 0. These cases were
// pre-LOCKLESS and must be checked in directory:
for(int i=0;i<normGen.length;i++) {
if (normGen[i] == 0) {
if (normGen[i] == CHECK_DIR) {
if (hasSeparateNorms(i)) {
return true;
}
@ -286,8 +300,8 @@ final class SegmentInfo {
* @param fieldIndex field whose norm file will be rewritten
*/
void advanceNormGen(int fieldIndex) {
if (normGen[fieldIndex] == -1) {
normGen[fieldIndex] = 1;
if (normGen[fieldIndex] == NO) {
normGen[fieldIndex] = YES;
} else {
normGen[fieldIndex]++;
}
@ -304,7 +318,7 @@ final class SegmentInfo {
long gen;
if (normGen == null) {
gen = 0;
gen = CHECK_DIR;
} else {
gen = normGen[number];
}
@ -318,12 +332,12 @@ final class SegmentInfo {
if (hasSingleNormFile) {
// case 2: lockless (or nrm file exists) - single file for all norms
prefix = "." + IndexFileNames.NORMS_EXTENSION;
return IndexFileNames.fileNameFromGeneration(name, prefix, 0);
return IndexFileNames.fileNameFromGeneration(name, prefix, WITHOUT_GEN);
}
// case 3: norm file for each field
prefix = ".f";
return IndexFileNames.fileNameFromGeneration(name, prefix + number, 0);
return IndexFileNames.fileNameFromGeneration(name, prefix + number, WITHOUT_GEN);
}
/**
@ -334,9 +348,9 @@ final class SegmentInfo {
*/
void setUseCompoundFile(boolean isCompoundFile) {
if (isCompoundFile) {
this.isCompoundFile = 1;
this.isCompoundFile = YES;
} else {
this.isCompoundFile = -1;
this.isCompoundFile = NO;
}
files = null;
}
@ -346,9 +360,9 @@ final class SegmentInfo {
* file; else, false.
*/
boolean getUseCompoundFile() throws IOException {
if (isCompoundFile == -1) {
if (isCompoundFile == NO) {
return false;
} else if (isCompoundFile == 1) {
} else if (isCompoundFile == YES) {
return true;
} else {
return dir.fileExists(name + "." + IndexFileNames.COMPOUND_FILE_EXTENSION);
@ -365,7 +379,7 @@ final class SegmentInfo {
output.writeLong(delGen);
output.writeByte((byte) (hasSingleNormFile ? 1:0));
if (normGen == null) {
output.writeInt(-1);
output.writeInt(NO);
} else {
output.writeInt(normGen.length);
for(int j = 0; j < normGen.length; j++) {
@ -405,33 +419,33 @@ final class SegmentInfo {
}
String delFileName = IndexFileNames.fileNameFromGeneration(name, "." + IndexFileNames.DELETES_EXTENSION, delGen);
if (delFileName != null && (delGen > 0 || dir.fileExists(delFileName))) {
if (delFileName != null && (delGen >= YES || dir.fileExists(delFileName))) {
files.add(delFileName);
}
// Careful logic for norms files:
// Careful logic for norms files
if (normGen != null) {
for(int i=0;i<normGen.length;i++) {
long gen = normGen[i];
if (gen > 0) {
if (gen >= YES) {
// Definitely a separate norm file, with generation:
files.add(IndexFileNames.fileNameFromGeneration(name, "." + IndexFileNames.SEPARATE_NORMS_EXTENSION + i, gen));
} else if (-1 == gen) {
// No separate norms but maybe non-separate norms
} else if (NO == gen) {
// No separate norms but maybe plain norms
// in the non compound file case:
if (!hasSingleNormFile && !useCompoundFile) {
String fileName = name + "." + IndexFileNames.SINGLE_NORMS_EXTENSION + i;
String fileName = name + "." + IndexFileNames.PLAIN_NORMS_EXTENSION + i;
if (dir.fileExists(fileName)) {
files.add(fileName);
}
}
} else if (0 == gen) {
} else if (CHECK_DIR == gen) {
// Pre-2.1: we have to check file existence
String fileName = null;
if (useCompoundFile) {
fileName = name + "." + IndexFileNames.SEPARATE_NORMS_EXTENSION + i;
} else if (!hasSingleNormFile) {
fileName = name + "." + IndexFileNames.SINGLE_NORMS_EXTENSION + i;
fileName = name + "." + IndexFileNames.PLAIN_NORMS_EXTENSION + i;
}
if (fileName != null && dir.fileExists(fileName)) {
files.add(fileName);
@ -445,7 +459,7 @@ final class SegmentInfo {
if (useCompoundFile)
prefix = name + "." + IndexFileNames.SEPARATE_NORMS_EXTENSION;
else
prefix = name + "." + IndexFileNames.SINGLE_NORMS_EXTENSION;
prefix = name + "." + IndexFileNames.PLAIN_NORMS_EXTENSION;
int prefixLength = prefix.length();
String[] allFiles = dir.list();
if (allFiles == null)