mirror of https://github.com/apache/lucene.git
LUCENE-325: add expungeDeletes methods to IndexWriter
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@620604 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
862c44215a
commit
1d4ab68796
|
@ -49,6 +49,12 @@ API Changes
|
||||||
see the changes. Deprecate IndexWriter.flush() in favor of
|
see the changes. Deprecate IndexWriter.flush() in favor of
|
||||||
IndexWriter.commit(). (Mike McCandless)
|
IndexWriter.commit(). (Mike McCandless)
|
||||||
|
|
||||||
|
5. LUCENE-325: Added IndexWriter.expungeDeletes methods, which
|
||||||
|
consult the MergePolicy to find merges necessary to merge away all
|
||||||
|
deletes from the index. This should be a somewhat lower cost
|
||||||
|
operation than optimize. (John Wang via Mike McCandless)
|
||||||
|
|
||||||
|
|
||||||
Bug fixes
|
Bug fixes
|
||||||
|
|
||||||
1. LUCENE-1134: Fixed BooleanQuery.rewrite to only optimze a single
|
1. LUCENE-1134: Fixed BooleanQuery.rewrite to only optimze a single
|
||||||
|
|
|
@ -26,7 +26,6 @@ import org.apache.lucene.store.Lock;
|
||||||
import org.apache.lucene.store.LockObtainFailedException;
|
import org.apache.lucene.store.LockObtainFailedException;
|
||||||
import org.apache.lucene.store.AlreadyClosedException;
|
import org.apache.lucene.store.AlreadyClosedException;
|
||||||
import org.apache.lucene.util.BitVector;
|
import org.apache.lucene.util.BitVector;
|
||||||
import org.apache.lucene.util.Parameter;
|
|
||||||
import org.apache.lucene.util.Constants;
|
import org.apache.lucene.util.Constants;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
@ -2163,6 +2162,7 @@ public class IndexWriter {
|
||||||
try {
|
try {
|
||||||
wait();
|
wait();
|
||||||
} catch (InterruptedException ie) {
|
} catch (InterruptedException ie) {
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mergeExceptions.size() > 0) {
|
if (mergeExceptions.size() > 0) {
|
||||||
|
@ -2205,6 +2205,87 @@ public class IndexWriter {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Just like {@link #expungeDeletes()}, except you can
|
||||||
|
* specify whether the call should block until the
|
||||||
|
* operation completes. This is only meaningful with a
|
||||||
|
* {@link MergeScheduler} that is able to run merges in
|
||||||
|
* background threads. */
|
||||||
|
public void expungeDeletes(boolean doWait)
|
||||||
|
throws CorruptIndexException, IOException {
|
||||||
|
ensureOpen();
|
||||||
|
|
||||||
|
if (infoStream != null)
|
||||||
|
message("expungeDeletes: index now " + segString());
|
||||||
|
|
||||||
|
MergePolicy.MergeSpecification spec;
|
||||||
|
|
||||||
|
synchronized(this) {
|
||||||
|
spec = mergePolicy.findMergesToExpungeDeletes(segmentInfos, this);
|
||||||
|
if (spec != null) {
|
||||||
|
final int numMerges = spec.merges.size();
|
||||||
|
for(int i=0;i<numMerges;i++)
|
||||||
|
registerMerge((MergePolicy.OneMerge) spec.merges.get(i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
mergeScheduler.merge(this);
|
||||||
|
|
||||||
|
if (spec != null && doWait) {
|
||||||
|
final int numMerges = spec.merges.size();
|
||||||
|
synchronized(this) {
|
||||||
|
boolean running = true;
|
||||||
|
while(running) {
|
||||||
|
|
||||||
|
running = false;
|
||||||
|
for(int i=0;i<numMerges;i++) {
|
||||||
|
final MergePolicy.OneMerge merge = (MergePolicy.OneMerge) spec.merges.get(i);
|
||||||
|
if (pendingMerges.contains(merge) || runningMerges.contains(merge))
|
||||||
|
running = true;
|
||||||
|
Throwable t = merge.getException();
|
||||||
|
if (t != null) {
|
||||||
|
IOException ioe = new IOException("background merge hit exception: " + merge.segString(directory));
|
||||||
|
ioe.initCause(t);
|
||||||
|
throw ioe;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (running) {
|
||||||
|
try {
|
||||||
|
wait();
|
||||||
|
} catch (InterruptedException ie) {
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// NOTE: in the ConcurrentMergeScheduler case, when
|
||||||
|
// doWait is false, we can return immediately while
|
||||||
|
// background threads accomplish the optimization
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** Expunges all deletes from the index. When and index
|
||||||
|
* has many document deletions (or updates to existing
|
||||||
|
* documents), it's best to either call optimize or
|
||||||
|
* expungeDeletes to remove all unusged data in the index
|
||||||
|
* associated with the deleted documents. To see how
|
||||||
|
* many deletions you have pending in your index, call
|
||||||
|
* {@link IndexReader#maxDoc - IndexReader#numDocs}.
|
||||||
|
* This saves disk space and memory usage while
|
||||||
|
* searching. expungeDeletes should be somewhat faster
|
||||||
|
* than optimize since it does not insist on reducing the
|
||||||
|
* index to a single segment (though, this depends on the
|
||||||
|
* {@link MergePolicy}; see {@link
|
||||||
|
* MergePolicy#findMergesToExpungeDeletes}.). Note that
|
||||||
|
* this call does not first commit any buffered
|
||||||
|
* documents, so you must do so yourself if necessary.
|
||||||
|
* See also {@link #expungeDeletes(boolean)}*/
|
||||||
|
public void expungeDeletes() throws CorruptIndexException, IOException {
|
||||||
|
expungeDeletes(true);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Expert: asks the mergePolicy whether any merges are
|
* Expert: asks the mergePolicy whether any merges are
|
||||||
* necessary now and if so, runs the requested merges and
|
* necessary now and if so, runs the requested merges and
|
||||||
|
|
|
@ -245,6 +245,54 @@ public abstract class LogMergePolicy extends MergePolicy {
|
||||||
return spec;
|
return spec;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Finds merges necessary to expunge all deletes from the
|
||||||
|
* index. We simply merge adjacent segments that have
|
||||||
|
* deletes, up to mergeFactor at a time.
|
||||||
|
*/
|
||||||
|
public MergeSpecification findMergesToExpungeDeletes(SegmentInfos segmentInfos,
|
||||||
|
IndexWriter writer)
|
||||||
|
throws CorruptIndexException, IOException
|
||||||
|
{
|
||||||
|
this.writer = writer;
|
||||||
|
|
||||||
|
final int numSegments = segmentInfos.size();
|
||||||
|
|
||||||
|
message("findMergesToExpungeDeletes: " + numSegments + " segments");
|
||||||
|
|
||||||
|
MergeSpecification spec = new MergeSpecification();
|
||||||
|
int firstSegmentWithDeletions = -1;
|
||||||
|
for(int i=0;i<numSegments;i++) {
|
||||||
|
final SegmentInfo info = segmentInfos.info(i);
|
||||||
|
if (info.hasDeletions()) {
|
||||||
|
message(" segment " + info.name + " has deletions");
|
||||||
|
if (firstSegmentWithDeletions == -1)
|
||||||
|
firstSegmentWithDeletions = i;
|
||||||
|
else if (i - firstSegmentWithDeletions == mergeFactor) {
|
||||||
|
// We've seen mergeFactor segments in a row with
|
||||||
|
// deletions, so force a merge now:
|
||||||
|
message(" add merge " + firstSegmentWithDeletions + " to " + (i-1) + " inclusive");
|
||||||
|
spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, i), useCompoundFile));
|
||||||
|
firstSegmentWithDeletions = i;
|
||||||
|
}
|
||||||
|
} else if (firstSegmentWithDeletions != -1) {
|
||||||
|
// End of a sequence of segments with deletions, so,
|
||||||
|
// merge those past segments even if it's fewer than
|
||||||
|
// mergeFactor segments
|
||||||
|
message(" add merge " + firstSegmentWithDeletions + " to " + (i-1) + " inclusive");
|
||||||
|
spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, i), useCompoundFile));
|
||||||
|
firstSegmentWithDeletions = -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (firstSegmentWithDeletions != -1) {
|
||||||
|
message(" add merge " + firstSegmentWithDeletions + " to " + (numSegments-1) + " inclusive");
|
||||||
|
spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, numSegments), useCompoundFile));
|
||||||
|
}
|
||||||
|
|
||||||
|
return spec;
|
||||||
|
}
|
||||||
|
|
||||||
/** Checks if any merges are now necessary and returns a
|
/** Checks if any merges are now necessary and returns a
|
||||||
* {@link MergePolicy.MergeSpecification} if so. A merge
|
* {@link MergePolicy.MergeSpecification} if so. A merge
|
||||||
* is necessary when there are more than {@link
|
* is necessary when there are more than {@link
|
||||||
|
|
|
@ -50,6 +50,8 @@ import java.util.Set;
|
||||||
*
|
*
|
||||||
* <p>The default MergePolicy is {@link
|
* <p>The default MergePolicy is {@link
|
||||||
* LogByteSizeMergePolicy}.</p>
|
* LogByteSizeMergePolicy}.</p>
|
||||||
|
* <p><b>NOTE:</b> This API is new and still experimental
|
||||||
|
* (subject to change suddenly in the next release)</p>
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public abstract class MergePolicy {
|
public abstract class MergePolicy {
|
||||||
|
@ -209,7 +211,7 @@ public abstract class MergePolicy {
|
||||||
throws CorruptIndexException, IOException;
|
throws CorruptIndexException, IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Determine what set of merge operations are necessary in
|
* Determine what set of merge operations is necessary in
|
||||||
* order to optimize the index. The IndexWriter calls
|
* order to optimize the index. The IndexWriter calls
|
||||||
* this when its optimize() method is called. This call
|
* this when its optimize() method is called. This call
|
||||||
* is always synchronized on the IndexWriter instance so
|
* is always synchronized on the IndexWriter instance so
|
||||||
|
@ -229,6 +231,19 @@ public abstract class MergePolicy {
|
||||||
Set segmentsToOptimize)
|
Set segmentsToOptimize)
|
||||||
throws CorruptIndexException, IOException;
|
throws CorruptIndexException, IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Determine what set of merge operations is necessary in
|
||||||
|
* order to expunge all deletes from the index.
|
||||||
|
* @param segmentInfos the total set of segments in the index
|
||||||
|
* @param writer IndexWriter instance
|
||||||
|
*/
|
||||||
|
MergeSpecification findMergesToExpungeDeletes(SegmentInfos segmentInfos,
|
||||||
|
IndexWriter writer)
|
||||||
|
throws CorruptIndexException, IOException
|
||||||
|
{
|
||||||
|
throw new RuntimeException("not implemented");
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Release all resources for the policy.
|
* Release all resources for the policy.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -19,10 +19,13 @@ package org.apache.lucene.index;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
/** Expert: {@link IndexWriter} uses an instance
|
/** <p>Expert: {@link IndexWriter} uses an instance
|
||||||
* implementing this interface to execute the merges
|
* implementing this interface to execute the merges
|
||||||
* selected by a {@link MergePolicy}. The default
|
* selected by a {@link MergePolicy}. The default
|
||||||
* MergeScheduler is {@link ConcurrentMergeScheduler}. */
|
* MergeScheduler is {@link ConcurrentMergeScheduler}.</p>
|
||||||
|
* <p><b>NOTE:</b> This API is new and still experimental
|
||||||
|
* (subject to change suddenly in the next release)</p>
|
||||||
|
*/
|
||||||
|
|
||||||
public abstract class MergeScheduler {
|
public abstract class MergeScheduler {
|
||||||
|
|
||||||
|
|
|
@ -2977,4 +2977,140 @@ public class TestIndexWriter extends LuceneTestCase
|
||||||
reader.close();
|
reader.close();
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// LUCENE-325: test expungeDeletes, when 2 singular merges
|
||||||
|
// are required
|
||||||
|
public void testExpungeDeletes() throws IOException {
|
||||||
|
Directory dir = new MockRAMDirectory();
|
||||||
|
IndexWriter writer = new IndexWriter(dir,
|
||||||
|
false, new StandardAnalyzer(),
|
||||||
|
IndexWriter.MaxFieldLength.LIMITED);
|
||||||
|
writer.setMaxBufferedDocs(2);
|
||||||
|
writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
|
||||||
|
|
||||||
|
Document document = new Document();
|
||||||
|
|
||||||
|
document = new Document();
|
||||||
|
Field storedField = new Field("stored", "stored", Field.Store.YES,
|
||||||
|
Field.Index.NO);
|
||||||
|
document.add(storedField);
|
||||||
|
Field termVectorField = new Field("termVector", "termVector",
|
||||||
|
Field.Store.NO, Field.Index.UN_TOKENIZED,
|
||||||
|
Field.TermVector.WITH_POSITIONS_OFFSETS);
|
||||||
|
document.add(termVectorField);
|
||||||
|
for(int i=0;i<10;i++)
|
||||||
|
writer.addDocument(document);
|
||||||
|
writer.close();
|
||||||
|
|
||||||
|
IndexReader ir = IndexReader.open(dir);
|
||||||
|
assertEquals(10, ir.maxDoc());
|
||||||
|
assertEquals(10, ir.numDocs());
|
||||||
|
ir.deleteDocument(0);
|
||||||
|
ir.deleteDocument(7);
|
||||||
|
assertEquals(8, ir.numDocs());
|
||||||
|
ir.close();
|
||||||
|
|
||||||
|
writer = new IndexWriter(dir,
|
||||||
|
false, new StandardAnalyzer(),
|
||||||
|
IndexWriter.MaxFieldLength.LIMITED);
|
||||||
|
writer.expungeDeletes();
|
||||||
|
writer.close();
|
||||||
|
ir = IndexReader.open(dir);
|
||||||
|
assertEquals(8, ir.maxDoc());
|
||||||
|
assertEquals(8, ir.numDocs());
|
||||||
|
ir.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
// LUCENE-325: test expungeDeletes, when many adjacent merges are required
|
||||||
|
public void testExpungeDeletes2() throws IOException {
|
||||||
|
Directory dir = new MockRAMDirectory();
|
||||||
|
IndexWriter writer = new IndexWriter(dir,
|
||||||
|
false, new StandardAnalyzer(),
|
||||||
|
IndexWriter.MaxFieldLength.LIMITED);
|
||||||
|
writer.setMaxBufferedDocs(2);
|
||||||
|
writer.setMergeFactor(50);
|
||||||
|
writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
|
||||||
|
|
||||||
|
Document document = new Document();
|
||||||
|
|
||||||
|
document = new Document();
|
||||||
|
Field storedField = new Field("stored", "stored", Field.Store.YES,
|
||||||
|
Field.Index.NO);
|
||||||
|
document.add(storedField);
|
||||||
|
Field termVectorField = new Field("termVector", "termVector",
|
||||||
|
Field.Store.NO, Field.Index.UN_TOKENIZED,
|
||||||
|
Field.TermVector.WITH_POSITIONS_OFFSETS);
|
||||||
|
document.add(termVectorField);
|
||||||
|
for(int i=0;i<98;i++)
|
||||||
|
writer.addDocument(document);
|
||||||
|
writer.close();
|
||||||
|
|
||||||
|
IndexReader ir = IndexReader.open(dir);
|
||||||
|
assertEquals(98, ir.maxDoc());
|
||||||
|
assertEquals(98, ir.numDocs());
|
||||||
|
for(int i=0;i<98;i+=2)
|
||||||
|
ir.deleteDocument(i);
|
||||||
|
assertEquals(49, ir.numDocs());
|
||||||
|
ir.close();
|
||||||
|
|
||||||
|
writer = new IndexWriter(dir,
|
||||||
|
false, new StandardAnalyzer(),
|
||||||
|
IndexWriter.MaxFieldLength.LIMITED);
|
||||||
|
writer.setMergeFactor(3);
|
||||||
|
writer.expungeDeletes();
|
||||||
|
writer.close();
|
||||||
|
ir = IndexReader.open(dir);
|
||||||
|
assertEquals(49, ir.maxDoc());
|
||||||
|
assertEquals(49, ir.numDocs());
|
||||||
|
ir.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
// LUCENE-325: test expungeDeletes without waiting, when
|
||||||
|
// many adjacent merges are required
|
||||||
|
public void testExpungeDeletes3() throws IOException {
|
||||||
|
Directory dir = new MockRAMDirectory();
|
||||||
|
IndexWriter writer = new IndexWriter(dir,
|
||||||
|
false, new StandardAnalyzer(),
|
||||||
|
IndexWriter.MaxFieldLength.LIMITED);
|
||||||
|
writer.setMaxBufferedDocs(2);
|
||||||
|
writer.setMergeFactor(50);
|
||||||
|
writer.setRAMBufferSizeMB(IndexWriter.DISABLE_AUTO_FLUSH);
|
||||||
|
|
||||||
|
Document document = new Document();
|
||||||
|
|
||||||
|
document = new Document();
|
||||||
|
Field storedField = new Field("stored", "stored", Field.Store.YES,
|
||||||
|
Field.Index.NO);
|
||||||
|
document.add(storedField);
|
||||||
|
Field termVectorField = new Field("termVector", "termVector",
|
||||||
|
Field.Store.NO, Field.Index.UN_TOKENIZED,
|
||||||
|
Field.TermVector.WITH_POSITIONS_OFFSETS);
|
||||||
|
document.add(termVectorField);
|
||||||
|
for(int i=0;i<98;i++)
|
||||||
|
writer.addDocument(document);
|
||||||
|
writer.close();
|
||||||
|
|
||||||
|
IndexReader ir = IndexReader.open(dir);
|
||||||
|
assertEquals(98, ir.maxDoc());
|
||||||
|
assertEquals(98, ir.numDocs());
|
||||||
|
for(int i=0;i<98;i+=2)
|
||||||
|
ir.deleteDocument(i);
|
||||||
|
assertEquals(49, ir.numDocs());
|
||||||
|
ir.close();
|
||||||
|
|
||||||
|
writer = new IndexWriter(dir,
|
||||||
|
false, new StandardAnalyzer(),
|
||||||
|
IndexWriter.MaxFieldLength.LIMITED);
|
||||||
|
// Force many merges to happen
|
||||||
|
writer.setMergeFactor(3);
|
||||||
|
writer.expungeDeletes(false);
|
||||||
|
writer.close();
|
||||||
|
ir = IndexReader.open(dir);
|
||||||
|
assertEquals(49, ir.maxDoc());
|
||||||
|
assertEquals(49, ir.numDocs());
|
||||||
|
ir.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue