mirror of https://github.com/apache/lucene.git
LUCENE-702: make sure addIndexes(*) does not corrupt index on disk full; change to transactional semantics; fix IndexWriter.mergeSegments and IndexReader.commit to clean up (and leave instance consistent) on exception
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@488330 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
0d8f7b704a
commit
d6823ef374
|
@ -240,6 +240,15 @@ Bug fixes
|
||||||
"The handle is invalid" IOExceptions on Windows when trying to
|
"The handle is invalid" IOExceptions on Windows when trying to
|
||||||
close readers or writers. (Michael Busch via Mike McCandless).
|
close readers or writers. (Michael Busch via Mike McCandless).
|
||||||
|
|
||||||
|
26. LUCENE-702: Fix IndexWriter.addIndexes(*) to not corrupt the index
|
||||||
|
on any exceptions (eg disk full). The semantics of these methods
|
||||||
|
is now transactional: either all indices are merged or none are.
|
||||||
|
Also fixed IndexWriter.mergeSegments (called outside of
|
||||||
|
addIndexes(*) by addDocument, optimize, flushRamSegments) and
|
||||||
|
IndexReader.commit() (called by close) to clean up and keep the
|
||||||
|
instance state consistent to what's actually in the index (Mike
|
||||||
|
McCandless).
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
|
|
||||||
1. LUCENE-586: TermDocs.skipTo() is now more efficient for
|
1. LUCENE-586: TermDocs.skipTo() is now more efficient for
|
||||||
|
|
|
@ -26,6 +26,8 @@ import java.io.IOException;
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
import java.util.Vector;
|
import java.util.Vector;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Iterator;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A utility class (used by both IndexReader and
|
* A utility class (used by both IndexReader and
|
||||||
|
@ -35,7 +37,7 @@ import java.util.HashMap;
|
||||||
*/
|
*/
|
||||||
public class IndexFileDeleter {
|
public class IndexFileDeleter {
|
||||||
private Vector deletable;
|
private Vector deletable;
|
||||||
private Vector pending;
|
private HashSet pending;
|
||||||
private Directory directory;
|
private Directory directory;
|
||||||
private SegmentInfos segmentInfos;
|
private SegmentInfos segmentInfos;
|
||||||
private PrintStream infoStream;
|
private PrintStream infoStream;
|
||||||
|
@ -45,6 +47,12 @@ public class IndexFileDeleter {
|
||||||
this.segmentInfos = segmentInfos;
|
this.segmentInfos = segmentInfos;
|
||||||
this.directory = directory;
|
this.directory = directory;
|
||||||
}
|
}
|
||||||
|
void setSegmentInfos(SegmentInfos segmentInfos) {
|
||||||
|
this.segmentInfos = segmentInfos;
|
||||||
|
}
|
||||||
|
SegmentInfos getSegmentInfos() {
|
||||||
|
return segmentInfos;
|
||||||
|
}
|
||||||
|
|
||||||
void setInfoStream(PrintStream infoStream) {
|
void setInfoStream(PrintStream infoStream) {
|
||||||
this.infoStream = infoStream;
|
this.infoStream = infoStream;
|
||||||
|
@ -134,6 +142,10 @@ public class IndexFileDeleter {
|
||||||
// This is an orphan'd separate norms file:
|
// This is an orphan'd separate norms file:
|
||||||
doDelete = true;
|
doDelete = true;
|
||||||
}
|
}
|
||||||
|
} else if ("cfs".equals(extension) && !info.getUseCompoundFile()) {
|
||||||
|
// This is a partially written
|
||||||
|
// _segmentName.cfs:
|
||||||
|
doDelete = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -168,6 +180,30 @@ public class IndexFileDeleter {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Delete these segments, as long as they are not listed
|
||||||
|
* in protectedSegments. If they are, then, instead, add
|
||||||
|
* them to the pending set.
|
||||||
|
*/
|
||||||
|
|
||||||
|
public final void deleteSegments(Vector segments, HashSet protectedSegments) throws IOException {
|
||||||
|
|
||||||
|
deleteFiles(); // try to delete files that we couldn't before
|
||||||
|
|
||||||
|
for (int i = 0; i < segments.size(); i++) {
|
||||||
|
SegmentReader reader = (SegmentReader)segments.elementAt(i);
|
||||||
|
if (reader.directory() == this.directory) {
|
||||||
|
if (protectedSegments.contains(reader.getSegmentName())) {
|
||||||
|
addPendingFiles(reader.files()); // record these for deletion on commit
|
||||||
|
} else {
|
||||||
|
deleteFiles(reader.files()); // try to delete our files
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
deleteFiles(reader.files(), reader.directory()); // delete other files
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public final void deleteFiles(Vector files, Directory directory)
|
public final void deleteFiles(Vector files, Directory directory)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
for (int i = 0; i < files.size(); i++)
|
for (int i = 0; i < files.size(); i++)
|
||||||
|
@ -199,22 +235,51 @@ public class IndexFileDeleter {
|
||||||
pending = null;
|
pending = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
final void addPendingFile(String fileName) {
|
/*
|
||||||
if (pending == null) {
|
Record that the files for these segments should be
|
||||||
pending = new Vector();
|
deleted, once the pending deletes are committed.
|
||||||
|
*/
|
||||||
|
final void addPendingSegments(Vector segments) throws IOException {
|
||||||
|
for (int i = 0; i < segments.size(); i++) {
|
||||||
|
SegmentReader reader = (SegmentReader)segments.elementAt(i);
|
||||||
|
if (reader.directory() == this.directory) {
|
||||||
|
addPendingFiles(reader.files());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
pending.addElement(fileName);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
final void commitPendingFiles() {
|
/*
|
||||||
|
Record list of files for deletion, but do not delete
|
||||||
|
them until commitPendingFiles is called.
|
||||||
|
*/
|
||||||
|
final void addPendingFiles(Vector files) {
|
||||||
|
for(int i=0;i<files.size();i++) {
|
||||||
|
addPendingFile((String) files.elementAt(i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
Record a file for deletion, but do not delete it until
|
||||||
|
commitPendingFiles is called.
|
||||||
|
*/
|
||||||
|
final void addPendingFile(String fileName) {
|
||||||
|
if (pending == null) {
|
||||||
|
pending = new HashSet();
|
||||||
|
}
|
||||||
|
pending.add(fileName);
|
||||||
|
}
|
||||||
|
|
||||||
|
final void commitPendingFiles() throws IOException {
|
||||||
if (pending != null) {
|
if (pending != null) {
|
||||||
if (deletable == null) {
|
if (deletable == null) {
|
||||||
deletable = pending;
|
deletable = new Vector();
|
||||||
pending = null;
|
|
||||||
} else {
|
|
||||||
deletable.addAll(pending);
|
|
||||||
pending = null;
|
|
||||||
}
|
}
|
||||||
|
Iterator it = pending.iterator();
|
||||||
|
while(it.hasNext()) {
|
||||||
|
deletable.addElement(it.next());
|
||||||
|
}
|
||||||
|
pending = null;
|
||||||
|
deleteFiles();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -120,6 +120,10 @@ public abstract class IndexReader {
|
||||||
private boolean stale;
|
private boolean stale;
|
||||||
private boolean hasChanges;
|
private boolean hasChanges;
|
||||||
|
|
||||||
|
/** Used by commit() to record pre-commit state in case
|
||||||
|
* rollback is necessary */
|
||||||
|
private boolean rollbackHasChanges;
|
||||||
|
private SegmentInfos rollbackSegmentInfos;
|
||||||
|
|
||||||
/** Returns an IndexReader reading the index in an FSDirectory in the named
|
/** Returns an IndexReader reading the index in an FSDirectory in the named
|
||||||
path. */
|
path. */
|
||||||
|
@ -584,7 +588,43 @@ public abstract class IndexReader {
|
||||||
protected abstract void doUndeleteAll() throws IOException;
|
protected abstract void doUndeleteAll() throws IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Commit changes resulting from delete, undeleteAll, or setNorm operations
|
* Should internally checkpoint state that will change
|
||||||
|
* during commit so that we can rollback if necessary.
|
||||||
|
*/
|
||||||
|
void startCommit() {
|
||||||
|
if (directoryOwner) {
|
||||||
|
rollbackSegmentInfos = (SegmentInfos) segmentInfos.clone();
|
||||||
|
}
|
||||||
|
rollbackHasChanges = hasChanges;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Rolls back state to just before the commit (this is
|
||||||
|
* called by commit() if there is some exception while
|
||||||
|
* committing).
|
||||||
|
*/
|
||||||
|
void rollbackCommit() {
|
||||||
|
if (directoryOwner) {
|
||||||
|
for(int i=0;i<segmentInfos.size();i++) {
|
||||||
|
// Rollback each segmentInfo. Because the
|
||||||
|
// SegmentReader holds a reference to the
|
||||||
|
// SegmentInfo we can't [easily] just replace
|
||||||
|
// segmentInfos, so we reset it in place instead:
|
||||||
|
segmentInfos.info(i).reset(rollbackSegmentInfos.info(i));
|
||||||
|
}
|
||||||
|
rollbackSegmentInfos = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
hasChanges = rollbackHasChanges;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Commit changes resulting from delete, undeleteAll, or
|
||||||
|
* setNorm operations
|
||||||
|
*
|
||||||
|
* If an exception is hit, then either no changes or all
|
||||||
|
* changes will have been committed to the index
|
||||||
|
* (transactional semantics).
|
||||||
*
|
*
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
|
@ -597,15 +637,53 @@ public abstract class IndexReader {
|
||||||
deleter.deleteFiles();
|
deleter.deleteFiles();
|
||||||
}
|
}
|
||||||
if(directoryOwner){
|
if(directoryOwner){
|
||||||
deleter.clearPendingFiles();
|
|
||||||
doCommit();
|
|
||||||
String oldInfoFileName = segmentInfos.getCurrentSegmentFileName();
|
|
||||||
segmentInfos.write(directory);
|
|
||||||
// Attempt to delete all files we just obsoleted:
|
|
||||||
|
|
||||||
|
// Should not be necessary: no prior commit should
|
||||||
|
// have left pending files, so just defensive:
|
||||||
|
deleter.clearPendingFiles();
|
||||||
|
|
||||||
|
String oldInfoFileName = segmentInfos.getCurrentSegmentFileName();
|
||||||
|
String nextSegmentsFileName = segmentInfos.getNextSegmentFileName();
|
||||||
|
|
||||||
|
// Checkpoint the state we are about to change, in
|
||||||
|
// case we have to roll back:
|
||||||
|
startCommit();
|
||||||
|
|
||||||
|
boolean success = false;
|
||||||
|
try {
|
||||||
|
doCommit();
|
||||||
|
segmentInfos.write(directory);
|
||||||
|
success = true;
|
||||||
|
} finally {
|
||||||
|
|
||||||
|
if (!success) {
|
||||||
|
|
||||||
|
// Rollback changes that were made to
|
||||||
|
// SegmentInfos but failed to get [fully]
|
||||||
|
// committed. This way this reader instance
|
||||||
|
// remains consistent (matched to what's
|
||||||
|
// actually in the index):
|
||||||
|
rollbackCommit();
|
||||||
|
|
||||||
|
// Erase any pending files that we were going to delete:
|
||||||
|
deleter.clearPendingFiles();
|
||||||
|
|
||||||
|
// Remove possibly partially written next
|
||||||
|
// segments file:
|
||||||
|
deleter.deleteFile(nextSegmentsFileName);
|
||||||
|
|
||||||
|
// Recompute deletable files & remove them (so
|
||||||
|
// partially written .del files, etc, are
|
||||||
|
// removed):
|
||||||
|
deleter.findDeletableFiles();
|
||||||
|
deleter.deleteFiles();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Attempt to delete all files we just obsoleted:
|
||||||
deleter.deleteFile(oldInfoFileName);
|
deleter.deleteFile(oldInfoFileName);
|
||||||
deleter.commitPendingFiles();
|
deleter.commitPendingFiles();
|
||||||
deleter.deleteFiles();
|
|
||||||
if (writeLock != null) {
|
if (writeLock != null) {
|
||||||
writeLock.release(); // release write lock
|
writeLock.release(); // release write lock
|
||||||
writeLock = null;
|
writeLock = null;
|
||||||
|
|
|
@ -31,7 +31,7 @@ import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
import java.util.Vector;
|
import java.util.Vector;
|
||||||
|
import java.util.HashSet;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
An IndexWriter creates and maintains an index.
|
An IndexWriter creates and maintains an index.
|
||||||
|
@ -99,6 +99,11 @@ public class IndexWriter {
|
||||||
|
|
||||||
private Similarity similarity = Similarity.getDefault(); // how to normalize
|
private Similarity similarity = Similarity.getDefault(); // how to normalize
|
||||||
|
|
||||||
|
private boolean inTransaction = false; // true iff we are in a transaction
|
||||||
|
private boolean commitPending; // true if segmentInfos has changes not yet committed
|
||||||
|
private HashSet protectedSegments; // segment names that should not be deleted until commit
|
||||||
|
private SegmentInfos rollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails
|
||||||
|
|
||||||
private SegmentInfos segmentInfos = new SegmentInfos(); // the segments
|
private SegmentInfos segmentInfos = new SegmentInfos(); // the segments
|
||||||
private SegmentInfos ramSegmentInfos = new SegmentInfos(); // the segments in ramDirectory
|
private SegmentInfos ramSegmentInfos = new SegmentInfos(); // the segments in ramDirectory
|
||||||
private final RAMDirectory ramDirectory = new RAMDirectory(); // for temp segs
|
private final RAMDirectory ramDirectory = new RAMDirectory(); // for temp segs
|
||||||
|
@ -473,6 +478,12 @@ public class IndexWriter {
|
||||||
* Adds a document to this index. If the document contains more than
|
* Adds a document to this index. If the document contains more than
|
||||||
* {@link #setMaxFieldLength(int)} terms for a given field, the remainder are
|
* {@link #setMaxFieldLength(int)} terms for a given field, the remainder are
|
||||||
* discarded.
|
* discarded.
|
||||||
|
*
|
||||||
|
* Note that if an Exception is hit (eg disk full) then
|
||||||
|
* the index will be consistent, but this document will
|
||||||
|
* not have been added. Furthermore, it's possible the
|
||||||
|
* index will have one segment in non-compound format even
|
||||||
|
* when using compound files.
|
||||||
*/
|
*/
|
||||||
public void addDocument(Document doc) throws IOException {
|
public void addDocument(Document doc) throws IOException {
|
||||||
addDocument(doc, analyzer);
|
addDocument(doc, analyzer);
|
||||||
|
@ -483,6 +494,9 @@ public class IndexWriter {
|
||||||
* value of {@link #getAnalyzer()}. If the document contains more than
|
* value of {@link #getAnalyzer()}. If the document contains more than
|
||||||
* {@link #setMaxFieldLength(int)} terms for a given field, the remainder are
|
* {@link #setMaxFieldLength(int)} terms for a given field, the remainder are
|
||||||
* discarded.
|
* discarded.
|
||||||
|
*
|
||||||
|
* See @link #addDocument(Document) for details on index
|
||||||
|
* state after an IOException.
|
||||||
*/
|
*/
|
||||||
public void addDocument(Document doc, Analyzer analyzer) throws IOException {
|
public void addDocument(Document doc, Analyzer analyzer) throws IOException {
|
||||||
DocumentWriter dw =
|
DocumentWriter dw =
|
||||||
|
@ -563,8 +577,22 @@ public class IndexWriter {
|
||||||
*/
|
*/
|
||||||
private PrintStream infoStream = null;
|
private PrintStream infoStream = null;
|
||||||
|
|
||||||
/** Merges all segments together into a single segment, optimizing an index
|
/** Merges all segments together into a single segment,
|
||||||
for search. */
|
* optimizing an index for search..
|
||||||
|
*
|
||||||
|
* <p>Note that this requires temporary free space in the
|
||||||
|
* Directory up to the size of the starting index (exact
|
||||||
|
* usage could be less but will depend on many
|
||||||
|
* factors).</p>
|
||||||
|
|
||||||
|
* <p>If an Exception is hit during optimize() (eg, due to
|
||||||
|
* disk full), the index will not be corrupted. However
|
||||||
|
* it's possible that one of the segments in the index
|
||||||
|
* will be in non-CFS format even when using compound file
|
||||||
|
* format. This will occur when the Exception is hit
|
||||||
|
* during conversion of the segment into compound
|
||||||
|
* format.</p>
|
||||||
|
*/
|
||||||
public synchronized void optimize() throws IOException {
|
public synchronized void optimize() throws IOException {
|
||||||
flushRamSegments();
|
flushRamSegments();
|
||||||
while (segmentInfos.size() > 1 ||
|
while (segmentInfos.size() > 1 ||
|
||||||
|
@ -579,6 +607,85 @@ public class IndexWriter {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Begin a transaction. During a transaction, any segment
|
||||||
|
* merges that happen (or ram segments flushed) will not
|
||||||
|
* write a new segments file and will not remove any files
|
||||||
|
* that were present at the start of the transaction. You
|
||||||
|
* must make a matched (try/finall) call to
|
||||||
|
* commitTransaction() or rollbackTransaction() to finish
|
||||||
|
* the transaction.
|
||||||
|
*/
|
||||||
|
private void startTransaction() throws IOException {
|
||||||
|
if (inTransaction) {
|
||||||
|
throw new IOException("transaction is already in process");
|
||||||
|
}
|
||||||
|
rollbackSegmentInfos = (SegmentInfos) segmentInfos.clone();
|
||||||
|
protectedSegments = new HashSet();
|
||||||
|
for(int i=0;i<segmentInfos.size();i++) {
|
||||||
|
SegmentInfo si = (SegmentInfo) segmentInfos.elementAt(i);
|
||||||
|
protectedSegments.add(si.name);
|
||||||
|
}
|
||||||
|
inTransaction = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Rolls back the transaction and restores state to where
|
||||||
|
* we were at the start.
|
||||||
|
*/
|
||||||
|
private void rollbackTransaction() throws IOException {
|
||||||
|
|
||||||
|
// Keep the same segmentInfos instance but replace all
|
||||||
|
// of its SegmentInfo instances. This is so the next
|
||||||
|
// attempt to commit using this instance of IndexWriter
|
||||||
|
// will always write to a new generation ("write once").
|
||||||
|
segmentInfos.clear();
|
||||||
|
segmentInfos.addAll(rollbackSegmentInfos);
|
||||||
|
|
||||||
|
// Ask deleter to locate unreferenced files & remove
|
||||||
|
// them:
|
||||||
|
deleter.clearPendingFiles();
|
||||||
|
deleter.findDeletableFiles();
|
||||||
|
deleter.deleteFiles();
|
||||||
|
|
||||||
|
clearTransaction();
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Commits the transaction. This will write the new
|
||||||
|
* segments file and remove and pending deletions we have
|
||||||
|
* accumulated during the transaction
|
||||||
|
*/
|
||||||
|
private void commitTransaction() throws IOException {
|
||||||
|
if (commitPending) {
|
||||||
|
boolean success = false;
|
||||||
|
try {
|
||||||
|
// If we hit eg disk full during this write we have
|
||||||
|
// to rollback.:
|
||||||
|
segmentInfos.write(directory); // commit changes
|
||||||
|
success = true;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
rollbackTransaction();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
deleter.commitPendingFiles();
|
||||||
|
commitPending = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
clearTransaction();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Should only be called by rollbackTransaction &
|
||||||
|
* commitTransaction */
|
||||||
|
private void clearTransaction() {
|
||||||
|
protectedSegments = null;
|
||||||
|
rollbackSegmentInfos = null;
|
||||||
|
inTransaction = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/** Merges all segments from an array of indexes into this index.
|
/** Merges all segments from an array of indexes into this index.
|
||||||
*
|
*
|
||||||
* <p>This may be used to parallelize batch indexing. A large document
|
* <p>This may be used to parallelize batch indexing. A large document
|
||||||
|
@ -587,27 +694,68 @@ public class IndexWriter {
|
||||||
* complete index can then be created by merging sub-collection indexes
|
* complete index can then be created by merging sub-collection indexes
|
||||||
* with this method.
|
* with this method.
|
||||||
*
|
*
|
||||||
* <p>After this completes, the index is optimized. */
|
* <p>After this completes, the index is optimized.
|
||||||
|
*
|
||||||
|
* <p>This method is transactional in how Exceptions are
|
||||||
|
* handled: it does not commit a new segments_N file until
|
||||||
|
* all indexes are added. This means if an Exception
|
||||||
|
* occurs (eg disk full), then either no indexes will have
|
||||||
|
* been added or they all will have been.</p>
|
||||||
|
*
|
||||||
|
* <p>If an Exception is hit, it's still possible that all
|
||||||
|
* indexes were successfully added. This happens when the
|
||||||
|
* Exception is hit when trying to build a CFS file. In
|
||||||
|
* this case, one segment in the index will be in non-CFS
|
||||||
|
* format, even when using compound file format.</p>
|
||||||
|
*
|
||||||
|
* <p>Also note that on an Exception, the index may still
|
||||||
|
* have been partially or fully optimized even though none
|
||||||
|
* of the input indexes were added. </p>
|
||||||
|
*
|
||||||
|
* <p>Note that this requires temporary free space in the
|
||||||
|
* Directory up to 2X the sum of all input indexes
|
||||||
|
* (including the starting index). Exact usage could be
|
||||||
|
* less but will depend on many factors.</p>
|
||||||
|
*
|
||||||
|
* <p>See <a target="_top"
|
||||||
|
* href="http://issues.apache.org/jira/browse/LUCENE-702">LUCENE-702</a>
|
||||||
|
* for details.</p>
|
||||||
|
*/
|
||||||
public synchronized void addIndexes(Directory[] dirs)
|
public synchronized void addIndexes(Directory[] dirs)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
|
||||||
optimize(); // start with zero or 1 seg
|
optimize(); // start with zero or 1 seg
|
||||||
|
|
||||||
int start = segmentInfos.size();
|
int start = segmentInfos.size();
|
||||||
|
|
||||||
for (int i = 0; i < dirs.length; i++) {
|
boolean success = false;
|
||||||
SegmentInfos sis = new SegmentInfos(); // read infos from dir
|
|
||||||
sis.read(dirs[i]);
|
|
||||||
for (int j = 0; j < sis.size(); j++) {
|
|
||||||
segmentInfos.addElement(sis.info(j)); // add each info
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// merge newly added segments in log(n) passes
|
startTransaction();
|
||||||
while (segmentInfos.size() > start+mergeFactor) {
|
|
||||||
for (int base = start; base < segmentInfos.size(); base++) {
|
try {
|
||||||
int end = Math.min(segmentInfos.size(), base+mergeFactor);
|
for (int i = 0; i < dirs.length; i++) {
|
||||||
if (end-base > 1)
|
SegmentInfos sis = new SegmentInfos(); // read infos from dir
|
||||||
mergeSegments(segmentInfos, base, end);
|
sis.read(dirs[i]);
|
||||||
|
for (int j = 0; j < sis.size(); j++) {
|
||||||
|
segmentInfos.addElement(sis.info(j)); // add each info
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// merge newly added segments in log(n) passes
|
||||||
|
while (segmentInfos.size() > start+mergeFactor) {
|
||||||
|
for (int base = start; base < segmentInfos.size(); base++) {
|
||||||
|
int end = Math.min(segmentInfos.size(), base+mergeFactor);
|
||||||
|
if (end-base > 1) {
|
||||||
|
mergeSegments(segmentInfos, base, end);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
success = true;
|
||||||
|
} finally {
|
||||||
|
if (success) {
|
||||||
|
commitTransaction();
|
||||||
|
} else {
|
||||||
|
rollbackTransaction();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -623,6 +771,11 @@ public class IndexWriter {
|
||||||
* <p>
|
* <p>
|
||||||
* This requires this index not be among those to be added, and the
|
* This requires this index not be among those to be added, and the
|
||||||
* upper bound* of those segment doc counts not exceed maxMergeDocs.
|
* upper bound* of those segment doc counts not exceed maxMergeDocs.
|
||||||
|
*
|
||||||
|
* <p>See {@link #addIndexes(Directory[])} for
|
||||||
|
* details on transactional semantics, temporary free
|
||||||
|
* space required in the Directory, and non-CFS segments
|
||||||
|
* on an Exception.</p>
|
||||||
*/
|
*/
|
||||||
public synchronized void addIndexesNoOptimize(Directory[] dirs)
|
public synchronized void addIndexesNoOptimize(Directory[] dirs)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
@ -651,96 +804,114 @@ public class IndexWriter {
|
||||||
// and target may use compound file or not. So we use mergeSegments() to
|
// and target may use compound file or not. So we use mergeSegments() to
|
||||||
// copy a segment, which may cause doc count to change because deleted
|
// copy a segment, which may cause doc count to change because deleted
|
||||||
// docs are garbage collected.
|
// docs are garbage collected.
|
||||||
//
|
|
||||||
// In current addIndexes(Directory[]), segment infos in S are added to
|
|
||||||
// T's "segmentInfos" upfront. Then segments in S are merged to T several
|
|
||||||
// at a time. Every merge is committed with T's "segmentInfos". So if
|
|
||||||
// a reader is opened on T while addIndexes() is going on, it could see
|
|
||||||
// an inconsistent index. AddIndexesNoOptimize() has a similar behaviour.
|
|
||||||
|
|
||||||
// 1 flush ram segments
|
// 1 flush ram segments
|
||||||
|
|
||||||
flushRamSegments();
|
flushRamSegments();
|
||||||
|
|
||||||
// 2 copy segment infos and find the highest level from dirs
|
// 2 copy segment infos and find the highest level from dirs
|
||||||
int start = segmentInfos.size();
|
int start = segmentInfos.size();
|
||||||
int startUpperBound = minMergeDocs;
|
int startUpperBound = minMergeDocs;
|
||||||
|
|
||||||
|
boolean success = false;
|
||||||
|
|
||||||
|
startTransaction();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
for (int i = 0; i < dirs.length; i++) {
|
|
||||||
if (directory == dirs[i]) {
|
|
||||||
// cannot add this index: segments may be deleted in merge before added
|
|
||||||
throw new IllegalArgumentException("Cannot add this index to itself");
|
|
||||||
}
|
|
||||||
|
|
||||||
SegmentInfos sis = new SegmentInfos(); // read infos from dir
|
try {
|
||||||
sis.read(dirs[i]);
|
for (int i = 0; i < dirs.length; i++) {
|
||||||
for (int j = 0; j < sis.size(); j++) {
|
if (directory == dirs[i]) {
|
||||||
SegmentInfo info = sis.info(j);
|
// cannot add this index: segments may be deleted in merge before added
|
||||||
segmentInfos.addElement(info); // add each info
|
throw new IllegalArgumentException("Cannot add this index to itself");
|
||||||
|
}
|
||||||
|
|
||||||
while (startUpperBound < info.docCount) {
|
SegmentInfos sis = new SegmentInfos(); // read infos from dir
|
||||||
startUpperBound *= mergeFactor; // find the highest level from dirs
|
sis.read(dirs[i]);
|
||||||
if (startUpperBound > maxMergeDocs) {
|
for (int j = 0; j < sis.size(); j++) {
|
||||||
// upper bound cannot exceed maxMergeDocs
|
SegmentInfo info = sis.info(j);
|
||||||
throw new IllegalArgumentException("Upper bound cannot exceed maxMergeDocs");
|
segmentInfos.addElement(info); // add each info
|
||||||
|
|
||||||
|
while (startUpperBound < info.docCount) {
|
||||||
|
startUpperBound *= mergeFactor; // find the highest level from dirs
|
||||||
|
if (startUpperBound > maxMergeDocs) {
|
||||||
|
// upper bound cannot exceed maxMergeDocs
|
||||||
|
throw new IllegalArgumentException("Upper bound cannot exceed maxMergeDocs");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} catch (IllegalArgumentException e) {
|
||||||
|
for (int i = segmentInfos.size() - 1; i >= start; i--) {
|
||||||
|
segmentInfos.remove(i);
|
||||||
|
}
|
||||||
|
throw e;
|
||||||
}
|
}
|
||||||
} catch (IllegalArgumentException e) {
|
|
||||||
for (int i = segmentInfos.size() - 1; i >= start; i--) {
|
// 3 maybe merge segments starting from the highest level from dirs
|
||||||
segmentInfos.remove(i);
|
maybeMergeSegments(startUpperBound);
|
||||||
|
|
||||||
|
// get the tail segments whose levels <= h
|
||||||
|
int segmentCount = segmentInfos.size();
|
||||||
|
int numTailSegments = 0;
|
||||||
|
while (numTailSegments < segmentCount
|
||||||
|
&& startUpperBound >= segmentInfos.info(segmentCount - 1 - numTailSegments).docCount) {
|
||||||
|
numTailSegments++;
|
||||||
}
|
}
|
||||||
throw e;
|
if (numTailSegments == 0) {
|
||||||
}
|
success = true;
|
||||||
|
|
||||||
// 3 maybe merge segments starting from the highest level from dirs
|
|
||||||
maybeMergeSegments(startUpperBound);
|
|
||||||
|
|
||||||
// get the tail segments whose levels <= h
|
|
||||||
int segmentCount = segmentInfos.size();
|
|
||||||
int numTailSegments = 0;
|
|
||||||
while (numTailSegments < segmentCount
|
|
||||||
&& startUpperBound >= segmentInfos.info(segmentCount - 1 - numTailSegments).docCount) {
|
|
||||||
numTailSegments++;
|
|
||||||
}
|
|
||||||
if (numTailSegments == 0) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// 4 make sure invariants hold for the tail segments whose levels <= h
|
|
||||||
if (checkNonDecreasingLevels(segmentCount - numTailSegments)) {
|
|
||||||
// identify the segments from S to be copied (not merged in 3)
|
|
||||||
int numSegmentsToCopy = 0;
|
|
||||||
while (numSegmentsToCopy < segmentCount
|
|
||||||
&& directory != segmentInfos.info(segmentCount - 1 - numSegmentsToCopy).dir) {
|
|
||||||
numSegmentsToCopy++;
|
|
||||||
}
|
|
||||||
if (numSegmentsToCopy == 0) {
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// copy those segments from S
|
// 4 make sure invariants hold for the tail segments whose levels <= h
|
||||||
for (int i = segmentCount - numSegmentsToCopy; i < segmentCount; i++) {
|
if (checkNonDecreasingLevels(segmentCount - numTailSegments)) {
|
||||||
mergeSegments(segmentInfos, i, i + 1);
|
// identify the segments from S to be copied (not merged in 3)
|
||||||
}
|
int numSegmentsToCopy = 0;
|
||||||
if (checkNonDecreasingLevels(segmentCount - numSegmentsToCopy)) {
|
while (numSegmentsToCopy < segmentCount
|
||||||
return;
|
&& directory != segmentInfos.info(segmentCount - 1 - numSegmentsToCopy).dir) {
|
||||||
}
|
numSegmentsToCopy++;
|
||||||
}
|
}
|
||||||
|
if (numSegmentsToCopy == 0) {
|
||||||
|
success = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// invariants do not hold, simply merge those segments
|
// copy those segments from S
|
||||||
mergeSegments(segmentInfos, segmentCount - numTailSegments, segmentCount);
|
for (int i = segmentCount - numSegmentsToCopy; i < segmentCount; i++) {
|
||||||
|
mergeSegments(segmentInfos, i, i + 1);
|
||||||
|
}
|
||||||
|
if (checkNonDecreasingLevels(segmentCount - numSegmentsToCopy)) {
|
||||||
|
success = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// maybe merge segments again if necessary
|
// invariants do not hold, simply merge those segments
|
||||||
if (segmentInfos.info(segmentInfos.size() - 1).docCount > startUpperBound) {
|
mergeSegments(segmentInfos, segmentCount - numTailSegments, segmentCount);
|
||||||
maybeMergeSegments(startUpperBound * mergeFactor);
|
|
||||||
|
// maybe merge segments again if necessary
|
||||||
|
if (segmentInfos.info(segmentInfos.size() - 1).docCount > startUpperBound) {
|
||||||
|
maybeMergeSegments(startUpperBound * mergeFactor);
|
||||||
|
}
|
||||||
|
|
||||||
|
success = true;
|
||||||
|
} finally {
|
||||||
|
if (success) {
|
||||||
|
commitTransaction();
|
||||||
|
} else {
|
||||||
|
rollbackTransaction();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Merges the provided indexes into this index.
|
/** Merges the provided indexes into this index.
|
||||||
* <p>After this completes, the index is optimized. </p>
|
* <p>After this completes, the index is optimized. </p>
|
||||||
* <p>The provided IndexReaders are not closed.</p>
|
* <p>The provided IndexReaders are not closed.</p>
|
||||||
|
|
||||||
|
* <p>See {@link #addIndexes(Directory[])} for
|
||||||
|
* details on transactional semantics, temporary free
|
||||||
|
* space required in the Directory, and non-CFS segments
|
||||||
|
* on an Exception.</p>
|
||||||
*/
|
*/
|
||||||
public synchronized void addIndexes(IndexReader[] readers)
|
public synchronized void addIndexes(IndexReader[] readers)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
@ -761,26 +932,61 @@ public class IndexWriter {
|
||||||
for (int i = 0; i < readers.length; i++) // add new indexes
|
for (int i = 0; i < readers.length; i++) // add new indexes
|
||||||
merger.add(readers[i]);
|
merger.add(readers[i]);
|
||||||
|
|
||||||
int docCount = merger.merge(); // merge 'em
|
SegmentInfo info;
|
||||||
|
|
||||||
segmentInfos.setSize(0); // pop old infos & add new
|
|
||||||
SegmentInfo info = new SegmentInfo(mergedName, docCount, directory, false);
|
|
||||||
segmentInfos.addElement(info);
|
|
||||||
|
|
||||||
if(sReader != null)
|
|
||||||
sReader.close();
|
|
||||||
|
|
||||||
String segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName();
|
String segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName();
|
||||||
segmentInfos.write(directory); // commit changes
|
|
||||||
|
boolean success = false;
|
||||||
|
|
||||||
|
startTransaction();
|
||||||
|
|
||||||
|
try {
|
||||||
|
int docCount = merger.merge(); // merge 'em
|
||||||
|
|
||||||
|
segmentInfos.setSize(0); // pop old infos & add new
|
||||||
|
info = new SegmentInfo(mergedName, docCount, directory, false);
|
||||||
|
segmentInfos.addElement(info);
|
||||||
|
commitPending = true;
|
||||||
|
|
||||||
|
if(sReader != null)
|
||||||
|
sReader.close();
|
||||||
|
|
||||||
|
success = true;
|
||||||
|
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
rollbackTransaction();
|
||||||
|
} else {
|
||||||
|
commitTransaction();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file
|
deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file
|
||||||
deleter.deleteSegments(segmentsToDelete); // delete now-unused segments
|
deleter.deleteSegments(segmentsToDelete); // delete now-unused segments
|
||||||
|
|
||||||
if (useCompoundFile) {
|
if (useCompoundFile) {
|
||||||
Vector filesToDelete = merger.createCompoundFile(mergedName + ".cfs");
|
success = false;
|
||||||
|
|
||||||
segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName();
|
segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName();
|
||||||
info.setUseCompoundFile(true);
|
Vector filesToDelete;
|
||||||
segmentInfos.write(directory); // commit again so readers know we've switched this segment to a compound file
|
|
||||||
|
startTransaction();
|
||||||
|
|
||||||
|
try {
|
||||||
|
|
||||||
|
filesToDelete = merger.createCompoundFile(mergedName + ".cfs");
|
||||||
|
|
||||||
|
info.setUseCompoundFile(true);
|
||||||
|
commitPending = true;
|
||||||
|
success = true;
|
||||||
|
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
rollbackTransaction();
|
||||||
|
} else {
|
||||||
|
commitTransaction();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file
|
deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file
|
||||||
deleter.deleteFiles(filesToDelete); // delete now unused files of segment
|
deleter.deleteFiles(filesToDelete); // delete now unused files of segment
|
||||||
|
@ -884,6 +1090,7 @@ public class IndexWriter {
|
||||||
// mergeFactor and/or maxBufferedDocs change(s)
|
// mergeFactor and/or maxBufferedDocs change(s)
|
||||||
while (numSegments >= mergeFactor) {
|
while (numSegments >= mergeFactor) {
|
||||||
// merge the leftmost* mergeFactor segments
|
// merge the leftmost* mergeFactor segments
|
||||||
|
|
||||||
int docCount = mergeSegments(segmentInfos, minSegment, minSegment + mergeFactor);
|
int docCount = mergeSegments(segmentInfos, minSegment, minSegment + mergeFactor);
|
||||||
numSegments -= mergeFactor;
|
numSegments -= mergeFactor;
|
||||||
|
|
||||||
|
@ -921,51 +1128,154 @@ public class IndexWriter {
|
||||||
SegmentMerger merger = new SegmentMerger(this, mergedName);
|
SegmentMerger merger = new SegmentMerger(this, mergedName);
|
||||||
|
|
||||||
final Vector segmentsToDelete = new Vector();
|
final Vector segmentsToDelete = new Vector();
|
||||||
for (int i = minSegment; i < end; i++) {
|
|
||||||
SegmentInfo si = sourceSegments.info(i);
|
|
||||||
if (infoStream != null)
|
|
||||||
infoStream.print(" " + si.name + " (" + si.docCount + " docs)");
|
|
||||||
IndexReader reader = SegmentReader.get(si);
|
|
||||||
merger.add(reader);
|
|
||||||
if ((reader.directory() == this.directory) || // if we own the directory
|
|
||||||
(reader.directory() == this.ramDirectory))
|
|
||||||
segmentsToDelete.addElement(reader); // queue segment for deletion
|
|
||||||
}
|
|
||||||
|
|
||||||
int mergedDocCount = merger.merge();
|
|
||||||
|
|
||||||
if (infoStream != null) {
|
|
||||||
infoStream.println(" into "+mergedName+" ("+mergedDocCount+" docs)");
|
|
||||||
}
|
|
||||||
|
|
||||||
SegmentInfo newSegment = new SegmentInfo(mergedName, mergedDocCount,
|
|
||||||
directory, false);
|
|
||||||
if (sourceSegments == ramSegmentInfos) {
|
|
||||||
sourceSegments.removeAllElements();
|
|
||||||
segmentInfos.addElement(newSegment);
|
|
||||||
} else {
|
|
||||||
for (int i = end-1; i > minSegment; i--) // remove old infos & add new
|
|
||||||
sourceSegments.remove(i);
|
|
||||||
segmentInfos.set(minSegment, newSegment);
|
|
||||||
}
|
|
||||||
|
|
||||||
// close readers before we attempt to delete now-obsolete segments
|
|
||||||
merger.closeReaders();
|
|
||||||
|
|
||||||
String segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName();
|
String segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName();
|
||||||
segmentInfos.write(directory); // commit before deleting
|
String nextSegmentsFileName = segmentInfos.getNextSegmentFileName();
|
||||||
|
|
||||||
deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file
|
SegmentInfo newSegment = null;
|
||||||
deleter.deleteSegments(segmentsToDelete); // delete now-unused segments
|
|
||||||
|
int mergedDocCount;
|
||||||
|
|
||||||
|
// This is try/finally to make sure merger's readers are closed:
|
||||||
|
try {
|
||||||
|
|
||||||
|
for (int i = minSegment; i < end; i++) {
|
||||||
|
SegmentInfo si = sourceSegments.info(i);
|
||||||
|
if (infoStream != null)
|
||||||
|
infoStream.print(" " + si.name + " (" + si.docCount + " docs)");
|
||||||
|
IndexReader reader = SegmentReader.get(si);
|
||||||
|
merger.add(reader);
|
||||||
|
if ((reader.directory() == this.directory) || // if we own the directory
|
||||||
|
(reader.directory() == this.ramDirectory))
|
||||||
|
segmentsToDelete.addElement(reader); // queue segment for deletion
|
||||||
|
}
|
||||||
|
|
||||||
|
SegmentInfos rollback = null;
|
||||||
|
boolean success = false;
|
||||||
|
|
||||||
|
// This is try/finally to rollback our internal state
|
||||||
|
// if we hit exception when doing the merge:
|
||||||
|
try {
|
||||||
|
|
||||||
|
mergedDocCount = merger.merge();
|
||||||
|
|
||||||
|
if (infoStream != null) {
|
||||||
|
infoStream.println(" into "+mergedName+" ("+mergedDocCount+" docs)");
|
||||||
|
}
|
||||||
|
|
||||||
|
newSegment = new SegmentInfo(mergedName, mergedDocCount,
|
||||||
|
directory, false);
|
||||||
|
|
||||||
|
|
||||||
|
if (sourceSegments == ramSegmentInfos) {
|
||||||
|
segmentInfos.addElement(newSegment);
|
||||||
|
} else {
|
||||||
|
|
||||||
|
if (!inTransaction) {
|
||||||
|
// Now save the SegmentInfo instances that
|
||||||
|
// we are replacing:
|
||||||
|
rollback = (SegmentInfos) segmentInfos.clone();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = end-1; i > minSegment; i--) // remove old infos & add new
|
||||||
|
sourceSegments.remove(i);
|
||||||
|
|
||||||
|
segmentInfos.set(minSegment, newSegment);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!inTransaction) {
|
||||||
|
segmentInfos.write(directory); // commit before deleting
|
||||||
|
} else {
|
||||||
|
commitPending = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
success = true;
|
||||||
|
|
||||||
|
} finally {
|
||||||
|
|
||||||
|
if (success) {
|
||||||
|
// The non-ram-segments case is already committed
|
||||||
|
// (above), so all the remains for ram segments case
|
||||||
|
// is to clear the ram segments:
|
||||||
|
if (sourceSegments == ramSegmentInfos) {
|
||||||
|
ramSegmentInfos.removeAllElements();
|
||||||
|
}
|
||||||
|
} else if (!inTransaction) {
|
||||||
|
|
||||||
|
// Must rollback so our state matches index:
|
||||||
|
|
||||||
|
if (sourceSegments == ramSegmentInfos) {
|
||||||
|
// Simple case: newSegment may or may not have
|
||||||
|
// been added to the end of our segment infos,
|
||||||
|
// so just check & remove if so:
|
||||||
|
if (newSegment != null &&
|
||||||
|
segmentInfos.size() > 0 &&
|
||||||
|
segmentInfos.info(segmentInfos.size()-1) == newSegment) {
|
||||||
|
segmentInfos.remove(segmentInfos.size()-1);
|
||||||
|
}
|
||||||
|
} else if (rollback != null) {
|
||||||
|
// Rollback the individual SegmentInfo
|
||||||
|
// instances, but keep original SegmentInfos
|
||||||
|
// instance (so we don't try to write again the
|
||||||
|
// same segments_N file -- write once):
|
||||||
|
segmentInfos.clear();
|
||||||
|
segmentInfos.addAll(rollback);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Delete any partially created files:
|
||||||
|
deleter.deleteFile(nextSegmentsFileName);
|
||||||
|
deleter.findDeletableFiles();
|
||||||
|
deleter.deleteFiles();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
// close readers before we attempt to delete now-obsolete segments
|
||||||
|
merger.closeReaders();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!inTransaction) {
|
||||||
|
deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file
|
||||||
|
deleter.deleteSegments(segmentsToDelete); // delete now-unused segments
|
||||||
|
} else {
|
||||||
|
deleter.addPendingFile(segmentsInfosFileName); // delete old segments_N file
|
||||||
|
deleter.deleteSegments(segmentsToDelete, protectedSegments); // delete now-unused segments
|
||||||
|
}
|
||||||
|
|
||||||
if (useCompoundFile) {
|
if (useCompoundFile) {
|
||||||
Vector filesToDelete = merger.createCompoundFile(mergedName + ".cfs");
|
|
||||||
|
|
||||||
segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName();
|
segmentsInfosFileName = nextSegmentsFileName;
|
||||||
newSegment.setUseCompoundFile(true);
|
nextSegmentsFileName = segmentInfos.getNextSegmentFileName();
|
||||||
segmentInfos.write(directory); // commit again so readers know we've switched this segment to a compound file
|
|
||||||
|
|
||||||
deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file
|
Vector filesToDelete;
|
||||||
|
|
||||||
|
boolean success = false;
|
||||||
|
|
||||||
|
try {
|
||||||
|
|
||||||
|
filesToDelete = merger.createCompoundFile(mergedName + ".cfs");
|
||||||
|
newSegment.setUseCompoundFile(true);
|
||||||
|
if (!inTransaction) {
|
||||||
|
segmentInfos.write(directory); // commit again so readers know we've switched this segment to a compound file
|
||||||
|
}
|
||||||
|
success = true;
|
||||||
|
|
||||||
|
} finally {
|
||||||
|
if (!success && !inTransaction) {
|
||||||
|
// Must rollback:
|
||||||
|
newSegment.setUseCompoundFile(false);
|
||||||
|
deleter.deleteFile(mergedName + ".cfs");
|
||||||
|
deleter.deleteFile(nextSegmentsFileName);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!inTransaction) {
|
||||||
|
deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file
|
||||||
|
}
|
||||||
|
|
||||||
|
// We can delete these segments whether or not we are
|
||||||
|
// in a transaction because we had just written them
|
||||||
|
// above so they can't need protection by the
|
||||||
|
// transaction:
|
||||||
deleter.deleteFiles(filesToDelete); // delete now-unused segments
|
deleter.deleteFiles(filesToDelete); // delete now-unused segments
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -230,6 +230,20 @@ public class MultiReader extends IndexReader {
|
||||||
subReaders[i].commit();
|
subReaders[i].commit();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void startCommit() {
|
||||||
|
super.startCommit();
|
||||||
|
for (int i = 0; i < subReaders.length; i++) {
|
||||||
|
subReaders[i].startCommit();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void rollbackCommit() {
|
||||||
|
super.rollbackCommit();
|
||||||
|
for (int i = 0; i < subReaders.length; i++) {
|
||||||
|
subReaders[i].rollbackCommit();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
protected synchronized void doClose() throws IOException {
|
protected synchronized void doClose() throws IOException {
|
||||||
for (int i = 0; i < subReaders.length; i++)
|
for (int i = 0; i < subReaders.length; i++)
|
||||||
subReaders[i].close();
|
subReaders[i].close();
|
||||||
|
|
|
@ -62,6 +62,23 @@ final class SegmentInfo {
|
||||||
preLockless = false;
|
preLockless = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copy everything from src SegmentInfo into our instance.
|
||||||
|
*/
|
||||||
|
void reset(SegmentInfo src) {
|
||||||
|
name = src.name;
|
||||||
|
docCount = src.docCount;
|
||||||
|
dir = src.dir;
|
||||||
|
preLockless = src.preLockless;
|
||||||
|
delGen = src.delGen;
|
||||||
|
if (src.normGen == null) {
|
||||||
|
normGen = null;
|
||||||
|
} else {
|
||||||
|
normGen = new long[src.normGen.length];
|
||||||
|
System.arraycopy(src.normGen, 0, normGen, 0, src.normGen.length);
|
||||||
|
}
|
||||||
|
isCompoundFile = src.isCompoundFile;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Construct a new SegmentInfo instance by reading a
|
* Construct a new SegmentInfo instance by reading a
|
||||||
|
@ -151,6 +168,17 @@ final class SegmentInfo {
|
||||||
delGen = -1;
|
delGen = -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Object clone () {
|
||||||
|
SegmentInfo si = new SegmentInfo(name, docCount, dir);
|
||||||
|
si.isCompoundFile = isCompoundFile;
|
||||||
|
si.delGen = delGen;
|
||||||
|
si.preLockless = preLockless;
|
||||||
|
if (normGen != null) {
|
||||||
|
si.normGen = (long[]) normGen.clone();
|
||||||
|
}
|
||||||
|
return si;
|
||||||
|
}
|
||||||
|
|
||||||
String getDelFileName() {
|
String getDelFileName() {
|
||||||
if (delGen == -1) {
|
if (delGen == -1) {
|
||||||
// In this case we know there is no deletion filename
|
// In this case we know there is no deletion filename
|
||||||
|
|
|
@ -50,7 +50,11 @@ public final class SegmentInfos extends Vector {
|
||||||
* starting with the current time in milliseconds forces to create unique version numbers.
|
* starting with the current time in milliseconds forces to create unique version numbers.
|
||||||
*/
|
*/
|
||||||
private long version = System.currentTimeMillis();
|
private long version = System.currentTimeMillis();
|
||||||
private long generation = 0; // generation of the "segments_N" file we read
|
|
||||||
|
private long generation = 0; // generation of the "segments_N" for the next commit
|
||||||
|
private long lastGeneration = 0; // generation of the "segments_N" file we last successfully read
|
||||||
|
// or wrote; this is normally the same as generation except if
|
||||||
|
// there was an IOException that had interrupted a commit
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* If non-null, information about loading segments_N files
|
* If non-null, information about loading segments_N files
|
||||||
|
@ -132,12 +136,28 @@ public final class SegmentInfos extends Vector {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the segment_N filename in use by this segment infos.
|
* Get the segments_N filename in use by this segment infos.
|
||||||
*/
|
*/
|
||||||
public String getCurrentSegmentFileName() {
|
public String getCurrentSegmentFileName() {
|
||||||
return IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS,
|
return IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS,
|
||||||
"",
|
"",
|
||||||
generation);
|
lastGeneration);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the next segments_N filename that will be written.
|
||||||
|
*/
|
||||||
|
public String getNextSegmentFileName() {
|
||||||
|
long nextGeneration;
|
||||||
|
|
||||||
|
if (generation == -1) {
|
||||||
|
nextGeneration = 1;
|
||||||
|
} else {
|
||||||
|
nextGeneration = generation+1;
|
||||||
|
}
|
||||||
|
return IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS,
|
||||||
|
"",
|
||||||
|
nextGeneration);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -158,6 +178,7 @@ public final class SegmentInfos extends Vector {
|
||||||
generation = Long.parseLong(segmentFileName.substring(1+IndexFileNames.SEGMENTS.length()),
|
generation = Long.parseLong(segmentFileName.substring(1+IndexFileNames.SEGMENTS.length()),
|
||||||
Character.MAX_RADIX);
|
Character.MAX_RADIX);
|
||||||
}
|
}
|
||||||
|
lastGeneration = generation;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
int format = input.readInt();
|
int format = input.readInt();
|
||||||
|
@ -199,7 +220,7 @@ public final class SegmentInfos extends Vector {
|
||||||
*/
|
*/
|
||||||
public final void read(Directory directory) throws IOException {
|
public final void read(Directory directory) throws IOException {
|
||||||
|
|
||||||
generation = -1;
|
generation = lastGeneration = -1;
|
||||||
|
|
||||||
new FindSegmentsFile(directory) {
|
new FindSegmentsFile(directory) {
|
||||||
|
|
||||||
|
@ -212,6 +233,8 @@ public final class SegmentInfos extends Vector {
|
||||||
|
|
||||||
public final void write(Directory directory) throws IOException {
|
public final void write(Directory directory) throws IOException {
|
||||||
|
|
||||||
|
String segmentFileName = getNextSegmentFileName();
|
||||||
|
|
||||||
// Always advance the generation on write:
|
// Always advance the generation on write:
|
||||||
if (generation == -1) {
|
if (generation == -1) {
|
||||||
generation = 1;
|
generation = 1;
|
||||||
|
@ -219,7 +242,6 @@ public final class SegmentInfos extends Vector {
|
||||||
generation++;
|
generation++;
|
||||||
}
|
}
|
||||||
|
|
||||||
String segmentFileName = getCurrentSegmentFileName();
|
|
||||||
IndexOutput output = directory.createOutput(segmentFileName);
|
IndexOutput output = directory.createOutput(segmentFileName);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
@ -229,8 +251,7 @@ public final class SegmentInfos extends Vector {
|
||||||
output.writeInt(counter); // write counter
|
output.writeInt(counter); // write counter
|
||||||
output.writeInt(size()); // write infos
|
output.writeInt(size()); // write infos
|
||||||
for (int i = 0; i < size(); i++) {
|
for (int i = 0; i < size(); i++) {
|
||||||
SegmentInfo si = info(i);
|
info(i).write(output);
|
||||||
si.write(output);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
finally {
|
finally {
|
||||||
|
@ -247,6 +268,21 @@ public final class SegmentInfos extends Vector {
|
||||||
// It's OK if we fail to write this file since it's
|
// It's OK if we fail to write this file since it's
|
||||||
// used only as one of the retry fallbacks.
|
// used only as one of the retry fallbacks.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
lastGeneration = generation;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a copy of this instance, also copying each
|
||||||
|
* SegmentInfo.
|
||||||
|
*/
|
||||||
|
|
||||||
|
public Object clone() {
|
||||||
|
SegmentInfos sis = (SegmentInfos) super.clone();
|
||||||
|
for(int i=0;i<sis.size();i++) {
|
||||||
|
sis.setElementAt(((SegmentInfo) sis.elementAt(i)).clone(), i);
|
||||||
|
}
|
||||||
|
return sis;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -496,7 +532,7 @@ public final class SegmentInfos extends Vector {
|
||||||
if (genLookaheadCount < defaultGenLookaheadCount) {
|
if (genLookaheadCount < defaultGenLookaheadCount) {
|
||||||
gen++;
|
gen++;
|
||||||
genLookaheadCount++;
|
genLookaheadCount++;
|
||||||
message("look ahead incremenent gen to " + gen);
|
message("look ahead increment gen to " + gen);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -47,6 +47,10 @@ class SegmentReader extends IndexReader {
|
||||||
private boolean normsDirty = false;
|
private boolean normsDirty = false;
|
||||||
private boolean undeleteAll = false;
|
private boolean undeleteAll = false;
|
||||||
|
|
||||||
|
private boolean rollbackDeletedDocsDirty = false;
|
||||||
|
private boolean rollbackNormsDirty = false;
|
||||||
|
private boolean rollbackUndeleteAll = false;
|
||||||
|
|
||||||
IndexInput freqStream;
|
IndexInput freqStream;
|
||||||
IndexInput proxStream;
|
IndexInput proxStream;
|
||||||
|
|
||||||
|
@ -64,6 +68,7 @@ class SegmentReader extends IndexReader {
|
||||||
private byte[] bytes;
|
private byte[] bytes;
|
||||||
private boolean dirty;
|
private boolean dirty;
|
||||||
private int number;
|
private int number;
|
||||||
|
private boolean rollbackDirty;
|
||||||
|
|
||||||
private void reWrite(SegmentInfo si) throws IOException {
|
private void reWrite(SegmentInfo si) throws IOException {
|
||||||
// NOTE: norms are re-written in regular directory, not cfs
|
// NOTE: norms are re-written in regular directory, not cfs
|
||||||
|
@ -545,4 +550,39 @@ class SegmentReader extends IndexReader {
|
||||||
|
|
||||||
return termVectorsReader.get(docNumber);
|
return termVectorsReader.get(docNumber);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the name of the segment this reader is reading.
|
||||||
|
*/
|
||||||
|
String getSegmentName() {
|
||||||
|
return segment;
|
||||||
|
}
|
||||||
|
|
||||||
|
void setSegmentInfo(SegmentInfo info) {
|
||||||
|
si = info;
|
||||||
|
}
|
||||||
|
|
||||||
|
void startCommit() {
|
||||||
|
super.startCommit();
|
||||||
|
rollbackDeletedDocsDirty = deletedDocsDirty;
|
||||||
|
rollbackNormsDirty = normsDirty;
|
||||||
|
rollbackUndeleteAll = undeleteAll;
|
||||||
|
Enumeration values = norms.elements();
|
||||||
|
while (values.hasMoreElements()) {
|
||||||
|
Norm norm = (Norm) values.nextElement();
|
||||||
|
norm.rollbackDirty = norm.dirty;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void rollbackCommit() {
|
||||||
|
super.rollbackCommit();
|
||||||
|
deletedDocsDirty = rollbackDeletedDocsDirty;
|
||||||
|
normsDirty = rollbackNormsDirty;
|
||||||
|
undeleteAll = rollbackUndeleteAll;
|
||||||
|
Enumeration values = norms.elements();
|
||||||
|
while (values.hasMoreElements()) {
|
||||||
|
Norm norm = (Norm) values.nextElement();
|
||||||
|
norm.dirty = norm.rollbackDirty;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -34,13 +34,13 @@ import java.util.Set;
|
||||||
*
|
*
|
||||||
* @version $Id$
|
* @version $Id$
|
||||||
*/
|
*/
|
||||||
public final class RAMDirectory extends Directory implements Serializable {
|
public class RAMDirectory extends Directory implements Serializable {
|
||||||
|
|
||||||
private static final long serialVersionUID = 1l;
|
private static final long serialVersionUID = 1l;
|
||||||
|
|
||||||
private HashMap fileMap = new HashMap();
|
HashMap fileMap = new HashMap();
|
||||||
private Set fileNames = fileMap.keySet();
|
private Set fileNames = fileMap.keySet();
|
||||||
private Collection files = fileMap.values();
|
Collection files = fileMap.values();
|
||||||
long sizeInBytes = 0;
|
long sizeInBytes = 0;
|
||||||
|
|
||||||
// *****
|
// *****
|
||||||
|
@ -178,20 +178,13 @@ public final class RAMDirectory extends Directory implements Serializable {
|
||||||
return file.getLength();
|
return file.getLength();
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Return total size in bytes of all files in this directory */
|
/** Return total size in bytes of all files in this
|
||||||
|
* directory. This is currently quantized to
|
||||||
|
* BufferedIndexOutput.BUFFER_SIZE. */
|
||||||
public synchronized final long sizeInBytes() {
|
public synchronized final long sizeInBytes() {
|
||||||
return sizeInBytes;
|
return sizeInBytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Provided for testing purposes. Use sizeInBytes() instead. */
|
|
||||||
public synchronized final long getRecomputedSizeInBytes() {
|
|
||||||
long size = 0;
|
|
||||||
Iterator it = files.iterator();
|
|
||||||
while (it.hasNext())
|
|
||||||
size += ((RAMFile) it.next()).getSizeInBytes();
|
|
||||||
return size;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Removes an existing file in the directory.
|
/** Removes an existing file in the directory.
|
||||||
* @throws IOException if the file does not exist
|
* @throws IOException if the file does not exist
|
||||||
*/
|
*/
|
||||||
|
@ -222,7 +215,7 @@ public final class RAMDirectory extends Directory implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Creates a new, empty file in the directory with the given name. Returns a stream writing this file. */
|
/** Creates a new, empty file in the directory with the given name. Returns a stream writing this file. */
|
||||||
public final IndexOutput createOutput(String name) {
|
public IndexOutput createOutput(String name) {
|
||||||
RAMFile file = new RAMFile(this);
|
RAMFile file = new RAMFile(this);
|
||||||
synchronized (this) {
|
synchronized (this) {
|
||||||
RAMFile existing = (RAMFile)fileMap.get(name);
|
RAMFile existing = (RAMFile)fileMap.get(name);
|
||||||
|
|
|
@ -66,7 +66,7 @@ public class RAMOutputStream extends BufferedIndexOutput {
|
||||||
file.setLength(0);
|
file.setLength(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void flushBuffer(byte[] src, int len) {
|
public void flushBuffer(byte[] src, int len) throws IOException {
|
||||||
byte[] buffer;
|
byte[] buffer;
|
||||||
int bufferPos = 0;
|
int bufferPos = 0;
|
||||||
while (bufferPos != len) {
|
while (bufferPos != len) {
|
||||||
|
|
|
@ -34,7 +34,12 @@ public class TestIndexFileDeleter extends TestCase
|
||||||
Directory dir = new RAMDirectory();
|
Directory dir = new RAMDirectory();
|
||||||
|
|
||||||
IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
|
IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
|
||||||
for(int i=0;i<35;i++) {
|
int i;
|
||||||
|
for(i=0;i<35;i++) {
|
||||||
|
addDoc(writer, i);
|
||||||
|
}
|
||||||
|
writer.setUseCompoundFile(false);
|
||||||
|
for(;i<45;i++) {
|
||||||
addDoc(writer, i);
|
addDoc(writer, i);
|
||||||
}
|
}
|
||||||
writer.close();
|
writer.close();
|
||||||
|
@ -68,7 +73,7 @@ public class TestIndexFileDeleter extends TestCase
|
||||||
CompoundFileReader cfsReader = new CompoundFileReader(dir, "_2.cfs");
|
CompoundFileReader cfsReader = new CompoundFileReader(dir, "_2.cfs");
|
||||||
FieldInfos fieldInfos = new FieldInfos(cfsReader, "_2.fnm");
|
FieldInfos fieldInfos = new FieldInfos(cfsReader, "_2.fnm");
|
||||||
int contentFieldIndex = -1;
|
int contentFieldIndex = -1;
|
||||||
for(int i=0;i<fieldInfos.size();i++) {
|
for(i=0;i<fieldInfos.size();i++) {
|
||||||
FieldInfo fi = fieldInfos.fieldInfo(i);
|
FieldInfo fi = fieldInfos.fieldInfo(i);
|
||||||
if (fi.name.equals("content")) {
|
if (fi.name.equals("content")) {
|
||||||
contentFieldIndex = i;
|
contentFieldIndex = i;
|
||||||
|
@ -125,6 +130,9 @@ public class TestIndexFileDeleter extends TestCase
|
||||||
copyFile(dir, "segments_a", "segments");
|
copyFile(dir, "segments_a", "segments");
|
||||||
copyFile(dir, "segments_a", "segments_2");
|
copyFile(dir, "segments_a", "segments_2");
|
||||||
|
|
||||||
|
// Create a bogus cfs file shadowing a non-cfs segment:
|
||||||
|
copyFile(dir, "_2.cfs", "_3.cfs");
|
||||||
|
|
||||||
String[] filesPre = dir.list();
|
String[] filesPre = dir.list();
|
||||||
|
|
||||||
// Open & close a writer: it should delete the above 4
|
// Open & close a writer: it should delete the above 4
|
||||||
|
|
|
@ -30,11 +30,18 @@ import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
|
|
||||||
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
|
import org.apache.lucene.search.Hits;
|
||||||
|
import org.apache.lucene.search.TermQuery;
|
||||||
|
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
import java.util.Arrays;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
|
||||||
|
import org.apache.lucene.store.MockRAMDirectory;
|
||||||
|
|
||||||
public class TestIndexReader extends TestCase
|
public class TestIndexReader extends TestCase
|
||||||
{
|
{
|
||||||
/** Main for running test case by itself. */
|
/** Main for running test case by itself. */
|
||||||
|
@ -548,6 +555,212 @@ public class TestIndexReader extends TestCase
|
||||||
deleteReaderReaderConflict(true);
|
deleteReaderReaderConflict(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Make sure if reader tries to commit but hits disk
|
||||||
|
* full that reader remains consistent and usable.
|
||||||
|
*/
|
||||||
|
public void testDiskFull() throws IOException {
|
||||||
|
|
||||||
|
boolean debug = false;
|
||||||
|
Term searchTerm = new Term("content", "aaa");
|
||||||
|
int START_COUNT = 157;
|
||||||
|
int END_COUNT = 144;
|
||||||
|
|
||||||
|
// First build up a starting index:
|
||||||
|
RAMDirectory startDir = new RAMDirectory();
|
||||||
|
IndexWriter writer = new IndexWriter(startDir, new WhitespaceAnalyzer(), true);
|
||||||
|
for(int i=0;i<157;i++) {
|
||||||
|
Document d = new Document();
|
||||||
|
d.add(new Field("id", Integer.toString(i), Field.Store.YES, Field.Index.UN_TOKENIZED));
|
||||||
|
d.add(new Field("content", "aaa " + i, Field.Store.NO, Field.Index.TOKENIZED));
|
||||||
|
writer.addDocument(d);
|
||||||
|
}
|
||||||
|
writer.close();
|
||||||
|
|
||||||
|
long diskUsage = startDir.sizeInBytes();
|
||||||
|
long diskFree = diskUsage+100;
|
||||||
|
|
||||||
|
IOException err = null;
|
||||||
|
|
||||||
|
boolean done = false;
|
||||||
|
|
||||||
|
// Iterate w/ ever increasing free disk space:
|
||||||
|
while(!done) {
|
||||||
|
MockRAMDirectory dir = new MockRAMDirectory(startDir);
|
||||||
|
IndexReader reader = IndexReader.open(dir);
|
||||||
|
|
||||||
|
// For each disk size, first try to commit against
|
||||||
|
// dir that will hit random IOExceptions & disk
|
||||||
|
// full; after, give it infinite disk space & turn
|
||||||
|
// off random IOExceptions & retry w/ same reader:
|
||||||
|
boolean success = false;
|
||||||
|
|
||||||
|
for(int x=0;x<2;x++) {
|
||||||
|
|
||||||
|
double rate = 0.05;
|
||||||
|
double diskRatio = ((double) diskFree)/diskUsage;
|
||||||
|
long thisDiskFree;
|
||||||
|
String testName;
|
||||||
|
|
||||||
|
if (0 == x) {
|
||||||
|
thisDiskFree = diskFree;
|
||||||
|
if (diskRatio >= 2.0) {
|
||||||
|
rate /= 2;
|
||||||
|
}
|
||||||
|
if (diskRatio >= 4.0) {
|
||||||
|
rate /= 2;
|
||||||
|
}
|
||||||
|
if (diskRatio >= 6.0) {
|
||||||
|
rate = 0.0;
|
||||||
|
}
|
||||||
|
if (debug) {
|
||||||
|
System.out.println("\ncycle: " + diskFree + " bytes");
|
||||||
|
}
|
||||||
|
testName = "disk full during reader.close() @ " + thisDiskFree + " bytes";
|
||||||
|
} else {
|
||||||
|
thisDiskFree = 0;
|
||||||
|
rate = 0.0;
|
||||||
|
if (debug) {
|
||||||
|
System.out.println("\ncycle: same writer: unlimited disk space");
|
||||||
|
}
|
||||||
|
testName = "reader re-use after disk full";
|
||||||
|
}
|
||||||
|
|
||||||
|
dir.setMaxSizeInBytes(thisDiskFree);
|
||||||
|
dir.setRandomIOExceptionRate(rate, diskFree);
|
||||||
|
|
||||||
|
try {
|
||||||
|
if (0 == x) {
|
||||||
|
int docId = 12;
|
||||||
|
for(int i=0;i<13;i++) {
|
||||||
|
reader.deleteDocument(docId);
|
||||||
|
reader.setNorm(docId, "contents", (float) 2.0);
|
||||||
|
docId += 12;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
reader.close();
|
||||||
|
success = true;
|
||||||
|
if (0 == x) {
|
||||||
|
done = true;
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
if (debug) {
|
||||||
|
System.out.println(" hit IOException: " + e);
|
||||||
|
}
|
||||||
|
err = e;
|
||||||
|
if (1 == x) {
|
||||||
|
e.printStackTrace();
|
||||||
|
fail(testName + " hit IOException after disk space was freed up");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Whether we succeeded or failed, check that all
|
||||||
|
// un-referenced files were in fact deleted (ie,
|
||||||
|
// we did not create garbage). Just create a
|
||||||
|
// new IndexFileDeleter, have it delete
|
||||||
|
// unreferenced files, then verify that in fact
|
||||||
|
// no files were deleted:
|
||||||
|
String[] startFiles = dir.list();
|
||||||
|
SegmentInfos infos = new SegmentInfos();
|
||||||
|
infos.read(dir);
|
||||||
|
IndexFileDeleter d = new IndexFileDeleter(infos, dir);
|
||||||
|
d.findDeletableFiles();
|
||||||
|
d.deleteFiles();
|
||||||
|
String[] endFiles = dir.list();
|
||||||
|
|
||||||
|
Arrays.sort(startFiles);
|
||||||
|
Arrays.sort(endFiles);
|
||||||
|
|
||||||
|
//for(int i=0;i<startFiles.length;i++) {
|
||||||
|
// System.out.println(" startFiles: " + i + ": " + startFiles[i]);
|
||||||
|
//}
|
||||||
|
|
||||||
|
if (!Arrays.equals(startFiles, endFiles)) {
|
||||||
|
String successStr;
|
||||||
|
if (success) {
|
||||||
|
successStr = "success";
|
||||||
|
} else {
|
||||||
|
successStr = "IOException";
|
||||||
|
err.printStackTrace();
|
||||||
|
}
|
||||||
|
fail("reader.close() failed to delete unreferenced files after " + successStr + " (" + diskFree + " bytes): before delete:\n " + arrayToString(startFiles) + "\n after delete:\n " + arrayToString(endFiles));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Finally, verify index is not corrupt, and, if
|
||||||
|
// we succeeded, we see all docs changed, and if
|
||||||
|
// we failed, we see either all docs or no docs
|
||||||
|
// changed (transactional semantics):
|
||||||
|
IndexReader newReader = null;
|
||||||
|
try {
|
||||||
|
newReader = IndexReader.open(dir);
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
fail(testName + ":exception when creating IndexReader after disk full during close: " + e);
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
int result = newReader.docFreq(searchTerm);
|
||||||
|
if (success) {
|
||||||
|
if (result != END_COUNT) {
|
||||||
|
fail(testName + ": method did not throw exception but docFreq('aaa') is " + result + " instead of expected " + END_COUNT);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// On hitting exception we still may have added
|
||||||
|
// all docs:
|
||||||
|
if (result != START_COUNT && result != END_COUNT) {
|
||||||
|
err.printStackTrace();
|
||||||
|
fail(testName + ": method did throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT + " or " + END_COUNT);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
IndexSearcher searcher = new IndexSearcher(newReader);
|
||||||
|
Hits hits = null;
|
||||||
|
try {
|
||||||
|
hits = searcher.search(new TermQuery(searchTerm));
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
fail(testName + ": exception when searching: " + e);
|
||||||
|
}
|
||||||
|
int result2 = hits.length();
|
||||||
|
if (success) {
|
||||||
|
if (result2 != END_COUNT) {
|
||||||
|
fail(testName + ": method did not throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + END_COUNT);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// On hitting exception we still may have added
|
||||||
|
// all docs:
|
||||||
|
if (result2 != START_COUNT && result2 != END_COUNT) {
|
||||||
|
err.printStackTrace();
|
||||||
|
fail(testName + ": method did throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + START_COUNT);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
searcher.close();
|
||||||
|
newReader.close();
|
||||||
|
|
||||||
|
if (result2 == END_COUNT) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
dir.close();
|
||||||
|
|
||||||
|
// Try again with 10 more bytes of free space:
|
||||||
|
diskFree += 10;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private String arrayToString(String[] l) {
|
||||||
|
String s = "";
|
||||||
|
for(int i=0;i<l.length;i++) {
|
||||||
|
if (i > 0) {
|
||||||
|
s += "\n ";
|
||||||
|
}
|
||||||
|
s += l[i];
|
||||||
|
}
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
private void deleteReaderReaderConflict(boolean optimize) throws IOException
|
private void deleteReaderReaderConflict(boolean optimize) throws IOException
|
||||||
{
|
{
|
||||||
Directory dir = getDirectory(true);
|
Directory dir = getDirectory(true);
|
||||||
|
@ -697,4 +910,6 @@ public class TestIndexReader extends TestCase
|
||||||
}
|
}
|
||||||
dir.delete();
|
dir.delete();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,6 +2,7 @@ package org.apache.lucene.index;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
import junit.framework.TestCase;
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
|
@ -10,12 +11,16 @@ import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.IndexWriter;
|
import org.apache.lucene.index.IndexWriter;
|
||||||
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
|
import org.apache.lucene.search.Hits;
|
||||||
|
import org.apache.lucene.search.TermQuery;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.FSDirectory;
|
import org.apache.lucene.store.FSDirectory;
|
||||||
import org.apache.lucene.store.RAMDirectory;
|
import org.apache.lucene.store.RAMDirectory;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
import org.apache.lucene.store.IndexOutput;
|
import org.apache.lucene.store.IndexOutput;
|
||||||
|
|
||||||
|
import org.apache.lucene.store.MockRAMDirectory;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author goller
|
* @author goller
|
||||||
|
@ -88,6 +93,350 @@ public class TestIndexWriter extends TestCase
|
||||||
writer.addDocument(doc);
|
writer.addDocument(doc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void addDocWithIndex(IndexWriter writer, int index) throws IOException
|
||||||
|
{
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new Field("content", "aaa " + index, Field.Store.YES, Field.Index.TOKENIZED));
|
||||||
|
doc.add(new Field("id", "" + index, Field.Store.YES, Field.Index.TOKENIZED));
|
||||||
|
writer.addDocument(doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
Test: make sure when we run out of disk space or hit
|
||||||
|
random IOExceptions in any of the addIndexes(*) calls
|
||||||
|
that 1) index is not corrupt (searcher can open/search
|
||||||
|
it) and 2) transactional semantics are followed:
|
||||||
|
either all or none of the incoming documents were in
|
||||||
|
fact added.
|
||||||
|
*/
|
||||||
|
public void testAddIndexOnDiskFull() throws IOException
|
||||||
|
{
|
||||||
|
|
||||||
|
int START_COUNT = 57;
|
||||||
|
int NUM_DIR = 50;
|
||||||
|
int END_COUNT = START_COUNT + NUM_DIR*25;
|
||||||
|
|
||||||
|
boolean debug = false;
|
||||||
|
|
||||||
|
// Build up a bunch of dirs that have indexes which we
|
||||||
|
// will then merge together by calling addIndexes(*):
|
||||||
|
Directory[] dirs = new Directory[NUM_DIR];
|
||||||
|
long inputDiskUsage = 0;
|
||||||
|
for(int i=0;i<NUM_DIR;i++) {
|
||||||
|
dirs[i] = new RAMDirectory();
|
||||||
|
IndexWriter writer = new IndexWriter(dirs[i], new WhitespaceAnalyzer(), true);
|
||||||
|
for(int j=0;j<25;j++) {
|
||||||
|
addDocWithIndex(writer, 25*i+j);
|
||||||
|
}
|
||||||
|
writer.close();
|
||||||
|
String[] files = dirs[i].list();
|
||||||
|
for(int j=0;j<files.length;j++) {
|
||||||
|
inputDiskUsage += dirs[i].fileLength(files[j]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now, build a starting index that has START_COUNT docs. We
|
||||||
|
// will then try to addIndexes into a copy of this:
|
||||||
|
RAMDirectory startDir = new RAMDirectory();
|
||||||
|
IndexWriter writer = new IndexWriter(startDir, new WhitespaceAnalyzer(), true);
|
||||||
|
for(int j=0;j<START_COUNT;j++) {
|
||||||
|
addDocWithIndex(writer, j);
|
||||||
|
}
|
||||||
|
writer.close();
|
||||||
|
|
||||||
|
// Make sure starting index seems to be working properly:
|
||||||
|
Term searchTerm = new Term("content", "aaa");
|
||||||
|
IndexReader reader = IndexReader.open(startDir);
|
||||||
|
assertEquals("first docFreq", 57, reader.docFreq(searchTerm));
|
||||||
|
|
||||||
|
IndexSearcher searcher = new IndexSearcher(reader);
|
||||||
|
Hits hits = searcher.search(new TermQuery(searchTerm));
|
||||||
|
assertEquals("first number of hits", 57, hits.length());
|
||||||
|
searcher.close();
|
||||||
|
reader.close();
|
||||||
|
|
||||||
|
// Iterate with larger and larger amounts of free
|
||||||
|
// disk space. With little free disk space,
|
||||||
|
// addIndexes will certainly run out of space &
|
||||||
|
// fail. Verify that when this happens, index is
|
||||||
|
// not corrupt and index in fact has added no
|
||||||
|
// documents. Then, we increase disk space by 1000
|
||||||
|
// bytes each iteration. At some point there is
|
||||||
|
// enough free disk space and addIndexes should
|
||||||
|
// succeed and index should show all documents were
|
||||||
|
// added.
|
||||||
|
|
||||||
|
// String[] files = startDir.list();
|
||||||
|
long diskUsage = startDir.sizeInBytes();
|
||||||
|
|
||||||
|
long startDiskUsage = 0;
|
||||||
|
String[] files = startDir.list();
|
||||||
|
for(int i=0;i<files.length;i++) {
|
||||||
|
startDiskUsage += startDir.fileLength(files[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int method=0;method<3;method++) {
|
||||||
|
|
||||||
|
// Start with 100 bytes more than we are currently using:
|
||||||
|
long diskFree = diskUsage+100;
|
||||||
|
|
||||||
|
boolean success = false;
|
||||||
|
boolean done = false;
|
||||||
|
|
||||||
|
String methodName;
|
||||||
|
if (0 == method) {
|
||||||
|
methodName = "addIndexes(Directory[])";
|
||||||
|
} else if (1 == method) {
|
||||||
|
methodName = "addIndexes(IndexReader[])";
|
||||||
|
} else {
|
||||||
|
methodName = "addIndexesNoOptimize(Directory[])";
|
||||||
|
}
|
||||||
|
|
||||||
|
String testName = "disk full test for method " + methodName + " with disk full at " + diskFree + " bytes";
|
||||||
|
|
||||||
|
int cycleCount = 0;
|
||||||
|
|
||||||
|
while(!done) {
|
||||||
|
|
||||||
|
cycleCount++;
|
||||||
|
|
||||||
|
// Make a new dir that will enforce disk usage:
|
||||||
|
MockRAMDirectory dir = new MockRAMDirectory(startDir);
|
||||||
|
writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false);
|
||||||
|
IOException err = null;
|
||||||
|
|
||||||
|
for(int x=0;x<2;x++) {
|
||||||
|
|
||||||
|
// Two loops: first time, limit disk space &
|
||||||
|
// throw random IOExceptions; second time, no
|
||||||
|
// disk space limit:
|
||||||
|
|
||||||
|
double rate = 0.05;
|
||||||
|
double diskRatio = ((double) diskFree)/diskUsage;
|
||||||
|
long thisDiskFree;
|
||||||
|
|
||||||
|
if (0 == x) {
|
||||||
|
thisDiskFree = diskFree;
|
||||||
|
if (diskRatio >= 2.0) {
|
||||||
|
rate /= 2;
|
||||||
|
}
|
||||||
|
if (diskRatio >= 4.0) {
|
||||||
|
rate /= 2;
|
||||||
|
}
|
||||||
|
if (diskRatio >= 6.0) {
|
||||||
|
rate = 0.0;
|
||||||
|
}
|
||||||
|
if (debug) {
|
||||||
|
System.out.println("\ncycle: " + methodName + ": " + diskFree + " bytes");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
thisDiskFree = 0;
|
||||||
|
rate = 0.0;
|
||||||
|
if (debug) {
|
||||||
|
System.out.println("\ncycle: " + methodName + ", same writer: unlimited disk space");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
dir.setMaxSizeInBytes(thisDiskFree);
|
||||||
|
dir.setRandomIOExceptionRate(rate, diskFree);
|
||||||
|
|
||||||
|
try {
|
||||||
|
|
||||||
|
if (0 == method) {
|
||||||
|
writer.addIndexes(dirs);
|
||||||
|
} else if (1 == method) {
|
||||||
|
IndexReader readers[] = new IndexReader[dirs.length];
|
||||||
|
for(int i=0;i<dirs.length;i++) {
|
||||||
|
readers[i] = IndexReader.open(dirs[i]);
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
writer.addIndexes(readers);
|
||||||
|
} finally {
|
||||||
|
for(int i=0;i<dirs.length;i++) {
|
||||||
|
readers[i].close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
writer.addIndexesNoOptimize(dirs);
|
||||||
|
}
|
||||||
|
|
||||||
|
success = true;
|
||||||
|
if (debug) {
|
||||||
|
System.out.println(" success!");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (0 == x) {
|
||||||
|
done = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (IOException e) {
|
||||||
|
success = false;
|
||||||
|
err = e;
|
||||||
|
if (debug) {
|
||||||
|
System.out.println(" hit IOException: " + e);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (1 == x) {
|
||||||
|
e.printStackTrace();
|
||||||
|
fail(methodName + " hit IOException after disk space was freed up");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Whether we succeeded or failed, check that all
|
||||||
|
// un-referenced files were in fact deleted (ie,
|
||||||
|
// we did not create garbage). Just create a
|
||||||
|
// new IndexFileDeleter, have it delete
|
||||||
|
// unreferenced files, then verify that in fact
|
||||||
|
// no files were deleted:
|
||||||
|
String[] startFiles = dir.list();
|
||||||
|
SegmentInfos infos = new SegmentInfos();
|
||||||
|
infos.read(dir);
|
||||||
|
IndexFileDeleter d = new IndexFileDeleter(infos, dir);
|
||||||
|
d.findDeletableFiles();
|
||||||
|
d.deleteFiles();
|
||||||
|
String[] endFiles = dir.list();
|
||||||
|
|
||||||
|
Arrays.sort(startFiles);
|
||||||
|
Arrays.sort(endFiles);
|
||||||
|
|
||||||
|
/*
|
||||||
|
for(int i=0;i<startFiles.length;i++) {
|
||||||
|
System.out.println(" " + i + ": " + startFiles[i]);
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (!Arrays.equals(startFiles, endFiles)) {
|
||||||
|
String successStr;
|
||||||
|
if (success) {
|
||||||
|
successStr = "success";
|
||||||
|
} else {
|
||||||
|
successStr = "IOException";
|
||||||
|
err.printStackTrace();
|
||||||
|
}
|
||||||
|
fail(methodName + " failed to delete unreferenced files after " + successStr + " (" + diskFree + " bytes): before delete:\n " + arrayToString(startFiles) + "\n after delete:\n " + arrayToString(endFiles));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (debug) {
|
||||||
|
System.out.println(" now test readers");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Finally, verify index is not corrupt, and, if
|
||||||
|
// we succeeded, we see all docs added, and if we
|
||||||
|
// failed, we see either all docs or no docs added
|
||||||
|
// (transactional semantics):
|
||||||
|
try {
|
||||||
|
reader = IndexReader.open(dir);
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
fail(testName + ": exception when creating IndexReader: " + e);
|
||||||
|
}
|
||||||
|
int result = reader.docFreq(searchTerm);
|
||||||
|
if (success) {
|
||||||
|
if (result != END_COUNT) {
|
||||||
|
fail(testName + ": method did not throw exception but docFreq('aaa') is " + result + " instead of expected " + END_COUNT);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// On hitting exception we still may have added
|
||||||
|
// all docs:
|
||||||
|
if (result != START_COUNT && result != END_COUNT) {
|
||||||
|
err.printStackTrace();
|
||||||
|
fail(testName + ": method did throw exception but docFreq('aaa') is " + result + " instead of expected " + START_COUNT + " or " + END_COUNT);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
searcher = new IndexSearcher(reader);
|
||||||
|
try {
|
||||||
|
hits = searcher.search(new TermQuery(searchTerm));
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
fail(testName + ": exception when searching: " + e);
|
||||||
|
}
|
||||||
|
int result2 = hits.length();
|
||||||
|
if (success) {
|
||||||
|
if (result2 != result) {
|
||||||
|
fail(testName + ": method did not throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + result);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// On hitting exception we still may have added
|
||||||
|
// all docs:
|
||||||
|
if (result2 != result) {
|
||||||
|
err.printStackTrace();
|
||||||
|
fail(testName + ": method did throw exception but hits.length for search on term 'aaa' is " + result2 + " instead of expected " + result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
searcher.close();
|
||||||
|
reader.close();
|
||||||
|
if (debug) {
|
||||||
|
System.out.println(" count is " + result);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result == END_COUNT) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Javadocs state that temp free Directory space
|
||||||
|
// required is at most 2X total input size of
|
||||||
|
// indices so let's make sure:
|
||||||
|
assertTrue("max free Directory space required exceeded 1X the total input index sizes during " + methodName +
|
||||||
|
": max temp usage = " + (dir.getMaxUsedSizeInBytes()-startDiskUsage) + " bytes; " +
|
||||||
|
"starting disk usage = " + startDiskUsage + " bytes; " +
|
||||||
|
"input index disk usage = " + inputDiskUsage + " bytes",
|
||||||
|
(dir.getMaxUsedSizeInBytes()-startDiskUsage) < 2*(startDiskUsage + inputDiskUsage));
|
||||||
|
|
||||||
|
writer.close();
|
||||||
|
dir.close();
|
||||||
|
|
||||||
|
// Try again with 1000 more bytes of free space:
|
||||||
|
diskFree += 1000;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
startDir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Make sure optimize doesn't use any more than 1X
|
||||||
|
* starting index size as its temporary free space
|
||||||
|
* required.
|
||||||
|
*/
|
||||||
|
public void testOptimizeTempSpaceUsage() throws IOException {
|
||||||
|
|
||||||
|
MockRAMDirectory dir = new MockRAMDirectory();
|
||||||
|
IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
|
||||||
|
for(int j=0;j<500;j++) {
|
||||||
|
addDocWithIndex(writer, j);
|
||||||
|
}
|
||||||
|
writer.close();
|
||||||
|
|
||||||
|
long startDiskUsage = 0;
|
||||||
|
String[] files = dir.list();
|
||||||
|
for(int i=0;i<files.length;i++) {
|
||||||
|
startDiskUsage += dir.fileLength(files[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
dir.resetMaxUsedSizeInBytes();
|
||||||
|
writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false);
|
||||||
|
writer.optimize();
|
||||||
|
writer.close();
|
||||||
|
long maxDiskUsage = dir.getMaxUsedSizeInBytes();
|
||||||
|
|
||||||
|
assertTrue("optimized used too much temporary space: starting usage was " + startDiskUsage + " bytes; max temp usage was " + maxDiskUsage + " but should have been " + (2*startDiskUsage) + " (= 2X starting usage)",
|
||||||
|
maxDiskUsage <= 2*startDiskUsage);
|
||||||
|
}
|
||||||
|
|
||||||
|
private String arrayToString(String[] l) {
|
||||||
|
String s = "";
|
||||||
|
for(int i=0;i<l.length;i++) {
|
||||||
|
if (i > 0) {
|
||||||
|
s += "\n ";
|
||||||
|
}
|
||||||
|
s += l[i];
|
||||||
|
}
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
// Make sure we can open an index for create even when a
|
// Make sure we can open an index for create even when a
|
||||||
// reader holds it open (this fails pre lock-less
|
// reader holds it open (this fails pre lock-less
|
||||||
// commits on windows):
|
// commits on windows):
|
||||||
|
@ -276,3 +625,5 @@ public class TestIndexWriter extends TestCase
|
||||||
dir.delete();
|
dir.delete();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -33,6 +33,8 @@ import org.apache.lucene.store.FSDirectory;
|
||||||
import org.apache.lucene.store.RAMDirectory;
|
import org.apache.lucene.store.RAMDirectory;
|
||||||
import org.apache.lucene.util.English;
|
import org.apache.lucene.util.English;
|
||||||
|
|
||||||
|
import org.apache.lucene.store.MockRAMDirectory;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* JUnit testcase to test RAMDirectory. RAMDirectory itself is used in many testcases,
|
* JUnit testcase to test RAMDirectory. RAMDirectory itself is used in many testcases,
|
||||||
* but not one of them uses an different constructor other than the default constructor.
|
* but not one of them uses an different constructor other than the default constructor.
|
||||||
|
@ -70,7 +72,7 @@ public class TestRAMDirectory extends TestCase {
|
||||||
public void testRAMDirectory () throws IOException {
|
public void testRAMDirectory () throws IOException {
|
||||||
|
|
||||||
Directory dir = FSDirectory.getDirectory(indexDir, false);
|
Directory dir = FSDirectory.getDirectory(indexDir, false);
|
||||||
RAMDirectory ramDir = new RAMDirectory(dir);
|
MockRAMDirectory ramDir = new MockRAMDirectory(dir);
|
||||||
|
|
||||||
// close the underlaying directory
|
// close the underlaying directory
|
||||||
dir.close();
|
dir.close();
|
||||||
|
@ -98,7 +100,7 @@ public class TestRAMDirectory extends TestCase {
|
||||||
|
|
||||||
public void testRAMDirectoryFile () throws IOException {
|
public void testRAMDirectoryFile () throws IOException {
|
||||||
|
|
||||||
RAMDirectory ramDir = new RAMDirectory(indexDir);
|
MockRAMDirectory ramDir = new MockRAMDirectory(indexDir);
|
||||||
|
|
||||||
// Check size
|
// Check size
|
||||||
assertEquals(ramDir.sizeInBytes(), ramDir.getRecomputedSizeInBytes());
|
assertEquals(ramDir.sizeInBytes(), ramDir.getRecomputedSizeInBytes());
|
||||||
|
@ -123,7 +125,7 @@ public class TestRAMDirectory extends TestCase {
|
||||||
|
|
||||||
public void testRAMDirectoryString () throws IOException {
|
public void testRAMDirectoryString () throws IOException {
|
||||||
|
|
||||||
RAMDirectory ramDir = new RAMDirectory(indexDir.getCanonicalPath());
|
MockRAMDirectory ramDir = new MockRAMDirectory(indexDir.getCanonicalPath());
|
||||||
|
|
||||||
// Check size
|
// Check size
|
||||||
assertEquals(ramDir.sizeInBytes(), ramDir.getRecomputedSizeInBytes());
|
assertEquals(ramDir.sizeInBytes(), ramDir.getRecomputedSizeInBytes());
|
||||||
|
@ -151,7 +153,7 @@ public class TestRAMDirectory extends TestCase {
|
||||||
|
|
||||||
public void testRAMDirectorySize() throws IOException, InterruptedException {
|
public void testRAMDirectorySize() throws IOException, InterruptedException {
|
||||||
|
|
||||||
final RAMDirectory ramDir = new RAMDirectory(indexDir.getCanonicalPath());
|
final MockRAMDirectory ramDir = new MockRAMDirectory(indexDir.getCanonicalPath());
|
||||||
final IndexWriter writer = new IndexWriter(ramDir, new WhitespaceAnalyzer(), false);
|
final IndexWriter writer = new IndexWriter(ramDir, new WhitespaceAnalyzer(), false);
|
||||||
writer.optimize();
|
writer.optimize();
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,130 @@
|
||||||
|
package org.apache.lucene.store;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.File;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.Random;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is a subclass of RAMDirectory that adds methods
|
||||||
|
* intented to be used only by unit tests.
|
||||||
|
* @version $Id: RAMDirectory.java 437897 2006-08-29 01:13:10Z yonik $
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class MockRAMDirectory extends RAMDirectory {
|
||||||
|
long maxSize;
|
||||||
|
|
||||||
|
// Max actual bytes used. This is set by MockRAMOutputStream:
|
||||||
|
long maxUsedSize;
|
||||||
|
double randomIOExceptionRate;
|
||||||
|
Random randomState;
|
||||||
|
|
||||||
|
public MockRAMDirectory() throws IOException {
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
public MockRAMDirectory(String dir) throws IOException {
|
||||||
|
super(dir);
|
||||||
|
}
|
||||||
|
public MockRAMDirectory(Directory dir) throws IOException {
|
||||||
|
super(dir);
|
||||||
|
}
|
||||||
|
public MockRAMDirectory(File dir) throws IOException {
|
||||||
|
super(dir);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setMaxSizeInBytes(long maxSize) {
|
||||||
|
this.maxSize = maxSize;
|
||||||
|
}
|
||||||
|
public long getMaxSizeInBytes() {
|
||||||
|
return this.maxSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the peek actual storage used (bytes) in this
|
||||||
|
* directory.
|
||||||
|
*/
|
||||||
|
public long getMaxUsedSizeInBytes() {
|
||||||
|
return this.maxUsedSize;
|
||||||
|
}
|
||||||
|
public void resetMaxUsedSizeInBytes() {
|
||||||
|
this.maxUsedSize = getRecomputedActualSizeInBytes();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If 0.0, no exceptions will be thrown. Else this should
|
||||||
|
* be a double 0.0 - 1.0. We will randomly throw an
|
||||||
|
* IOException on the first write to an OutputStream based
|
||||||
|
* on this probability.
|
||||||
|
*/
|
||||||
|
public void setRandomIOExceptionRate(double rate, long seed) {
|
||||||
|
randomIOExceptionRate = rate;
|
||||||
|
// seed so we have deterministic behaviour:
|
||||||
|
randomState = new Random(seed);
|
||||||
|
}
|
||||||
|
public double getRandomIOExceptionRate() {
|
||||||
|
return randomIOExceptionRate;
|
||||||
|
}
|
||||||
|
|
||||||
|
void maybeThrowIOException() throws IOException {
|
||||||
|
if (randomIOExceptionRate > 0.0) {
|
||||||
|
int number = Math.abs(randomState.nextInt() % 1000);
|
||||||
|
if (number < randomIOExceptionRate*1000) {
|
||||||
|
throw new IOException("a random IOException");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public IndexOutput createOutput(String name) {
|
||||||
|
RAMFile file = new RAMFile(this);
|
||||||
|
synchronized (this) {
|
||||||
|
RAMFile existing = (RAMFile)fileMap.get(name);
|
||||||
|
if (existing!=null) {
|
||||||
|
sizeInBytes -= existing.sizeInBytes;
|
||||||
|
existing.directory = null;
|
||||||
|
}
|
||||||
|
fileMap.put(name, file);
|
||||||
|
}
|
||||||
|
|
||||||
|
return new MockRAMOutputStream(this, file);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Provided for testing purposes. Use sizeInBytes() instead. */
|
||||||
|
public synchronized final long getRecomputedSizeInBytes() {
|
||||||
|
long size = 0;
|
||||||
|
Iterator it = files.iterator();
|
||||||
|
while (it.hasNext())
|
||||||
|
size += ((RAMFile) it.next()).getSizeInBytes();
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Like getRecomputedSizeInBytes(), but, uses actual file
|
||||||
|
* lengths rather than buffer allocations (which are
|
||||||
|
* quantized up to nearest
|
||||||
|
* BufferedIndexOutput.BUFFER_SIZE (now 1024) bytes.
|
||||||
|
*/
|
||||||
|
|
||||||
|
final long getRecomputedActualSizeInBytes() {
|
||||||
|
long size = 0;
|
||||||
|
Iterator it = files.iterator();
|
||||||
|
while (it.hasNext())
|
||||||
|
size += ((RAMFile) it.next()).length;
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,83 @@
|
||||||
|
package org.apache.lucene.store;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Used by MockRAMDirectory to create an output stream that
|
||||||
|
* will throw an IOException on fake disk full, track max
|
||||||
|
* disk space actually used, and maybe throw random
|
||||||
|
* IOExceptions.
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class MockRAMOutputStream extends RAMOutputStream {
|
||||||
|
private MockRAMDirectory dir;
|
||||||
|
private boolean first=true;
|
||||||
|
|
||||||
|
/** Construct an empty output buffer. */
|
||||||
|
public MockRAMOutputStream(MockRAMDirectory dir, RAMFile f) {
|
||||||
|
super(f);
|
||||||
|
this.dir = dir;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void close() throws IOException {
|
||||||
|
super.close();
|
||||||
|
|
||||||
|
// Now compute actual disk usage & track the maxUsedSize
|
||||||
|
// in the MockRAMDirectory:
|
||||||
|
long size = dir.getRecomputedActualSizeInBytes();
|
||||||
|
if (size > dir.maxUsedSize) {
|
||||||
|
dir.maxUsedSize = size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void flushBuffer(byte[] src, int len) throws IOException {
|
||||||
|
long freeSpace = dir.maxSize - dir.sizeInBytes();
|
||||||
|
long realUsage = 0;
|
||||||
|
|
||||||
|
// Enforce disk full:
|
||||||
|
if (dir.maxSize != 0 && freeSpace <= len) {
|
||||||
|
// Compute the real disk free. This will greatly slow
|
||||||
|
// down our test but makes it more accurate:
|
||||||
|
realUsage = dir.getRecomputedActualSizeInBytes();
|
||||||
|
freeSpace = dir.maxSize - realUsage;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dir.maxSize != 0 && freeSpace <= len) {
|
||||||
|
if (freeSpace > 0 && freeSpace < len) {
|
||||||
|
realUsage += freeSpace;
|
||||||
|
super.flushBuffer(src, (int) freeSpace);
|
||||||
|
}
|
||||||
|
if (realUsage > dir.maxUsedSize) {
|
||||||
|
dir.maxUsedSize = realUsage;
|
||||||
|
}
|
||||||
|
throw new IOException("fake disk full at " + dir.sizeInBytes() + " bytes");
|
||||||
|
} else {
|
||||||
|
super.flushBuffer(src, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (first) {
|
||||||
|
// Maybe throw random exception; only do this on first
|
||||||
|
// write to a new file:
|
||||||
|
first = false;
|
||||||
|
dir.maybeThrowIOException();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue