LUCENE-565: merge NewIndexModifier back into IndexWriter

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@506964 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2007-02-13 10:43:08 +00:00
parent 13eaccfd56
commit 95bab8401f
6 changed files with 318 additions and 381 deletions

View File

@ -102,10 +102,10 @@ New features
their passing unit tests. their passing unit tests.
(Otis Gospodnetic) (Otis Gospodnetic)
13. LUCENE-565: Added NewIndexModifier (subclass of IndexWriter) to 13. LUCENE-565: Added methods to IndexWriter to more efficiently
more efficiently handle updating documents (the "delete then add" handle updating documents (the "delete then add" use case). This
use case). This is intended to be an eventual replacement for the is intended to be an eventual replacement for the existing
existing IndexModifier. Added IndexWriter.flush() (renamed from IndexModifier. Added IndexWriter.flush() (renamed from
flushRamSegments()) to flush all pending updates (held in RAM), to flushRamSegments()) to flush all pending updates (held in RAM), to
the Directory. (Ning Li via Mike McCandless) the Directory. (Ning Li via Mike McCandless)

View File

@ -27,6 +27,11 @@ import java.io.IOException;
import java.io.PrintStream; import java.io.PrintStream;
/** /**
* <p>[Note that as of <b>2.1</b>, all but one of the
* methods in this class are available via {@link
* IndexWriter}. The one method that is not available is
* {@link #deleteDocument(int)}.]</p>
*
* A class to modify an index, i.e. to delete and add documents. This * A class to modify an index, i.e. to delete and add documents. This
* class hides {@link IndexReader} and {@link IndexWriter} so that you * class hides {@link IndexReader} and {@link IndexWriter} so that you
* do not need to care about implementation details such as that adding * do not need to care about implementation details such as that adding

View File

@ -32,6 +32,9 @@ import java.io.IOException;
import java.io.PrintStream; import java.io.PrintStream;
import java.util.Vector; import java.util.Vector;
import java.util.HashSet; import java.util.HashSet;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map.Entry;
/** /**
An IndexWriter creates and maintains an index. An IndexWriter creates and maintains an index.
@ -58,7 +61,16 @@ import java.util.HashSet;
is also thrown if an IndexReader on the same directory is used to delete documents is also thrown if an IndexReader on the same directory is used to delete documents
from the index.</p> from the index.</p>
@see IndexModifier IndexModifier supports the important methods of IndexWriter plus deletion <p>As of <b>2.1</b>, IndexWriter can now delete documents
by {@link Term} (see {@link #deleteDocuments} ) and update
(delete then add) documents (see {@link #updateDocument}).
Deletes are buffered until {@link
#setMaxBufferedDeleteTerms} <code>Terms</code> at which
point they are flushed to the index. Note that a flush
occurs when there are enough buffered deletes or enough
added documents, whichever is sooner. When a flush
occurs, both pending deletes and added documents are
flushed to the index.</p>
*/ */
public class IndexWriter { public class IndexWriter {
@ -83,6 +95,11 @@ public class IndexWriter {
*/ */
public final static int DEFAULT_MAX_BUFFERED_DOCS = 10; public final static int DEFAULT_MAX_BUFFERED_DOCS = 10;
/**
* Default value is 1000. Change using {@link #setMaxBufferedDeleteTerms(int)}.
*/
public final static int DEFAULT_MAX_BUFFERED_DELETE_TERMS = 1000;
/** /**
* Default value is {@link Integer#MAX_VALUE}. Change using {@link #setMaxMergeDocs(int)}. * Default value is {@link Integer#MAX_VALUE}. Change using {@link #setMaxMergeDocs(int)}.
*/ */
@ -108,8 +125,8 @@ public class IndexWriter {
private HashSet protectedSegments; // segment names that should not be deleted until commit private HashSet protectedSegments; // segment names that should not be deleted until commit
private SegmentInfos rollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails private SegmentInfos rollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails
protected SegmentInfos segmentInfos = new SegmentInfos(); // the segments SegmentInfos segmentInfos = new SegmentInfos(); // the segments
protected SegmentInfos ramSegmentInfos = new SegmentInfos(); // the segments in ramDirectory SegmentInfos ramSegmentInfos = new SegmentInfos(); // the segments in ramDirectory
private final RAMDirectory ramDirectory = new RAMDirectory(); // for temp segs private final RAMDirectory ramDirectory = new RAMDirectory(); // for temp segs
private IndexFileDeleter deleter; private IndexFileDeleter deleter;
@ -117,6 +134,16 @@ public class IndexWriter {
private int termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL; private int termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL;
// The max number of delete terms that can be buffered before
// they must be flushed to disk.
private int maxBufferedDeleteTerms = DEFAULT_MAX_BUFFERED_DELETE_TERMS;
// This Hashmap buffers delete terms in ram before they are applied.
// The key is delete term; the value is number of ram
// segments the term applies to.
private HashMap bufferedDeleteTerms = new HashMap();
private int numBufferedDeleteTerms = 0;
/** Use compound file setting. Defaults to true, minimizing the number of /** Use compound file setting. Defaults to true, minimizing the number of
* files used. Setting this to false may improve indexing performance, but * files used. Setting this to false may improve indexing performance, but
* may also cause file handle problems. * may also cause file handle problems.
@ -125,10 +152,6 @@ public class IndexWriter {
private boolean closeDir; private boolean closeDir;
protected IndexFileDeleter getDeleter() {
return deleter;
}
/** Get the current setting of whether to use the compound file format. /** Get the current setting of whether to use the compound file format.
* Note that this just returns the value you set with setUseCompoundFile(boolean) * Note that this just returns the value you set with setUseCompoundFile(boolean)
* or the default. You cannot use this to query the status of an existing index. * or the default. You cannot use this to query the status of an existing index.
@ -440,6 +463,28 @@ public class IndexWriter {
return minMergeDocs; return minMergeDocs;
} }
/**
* <p>Determines the minimal number of delete terms required before the buffered
* in-memory delete terms are applied and flushed. If there are documents
* buffered in memory at the time, they are merged and a new segment is
* created.</p>
* <p>The default value is {@link #DEFAULT_MAX_BUFFERED_DELETE_TERMS}.
* @throws IllegalArgumentException if maxBufferedDeleteTerms is smaller than 1</p>
*/
public void setMaxBufferedDeleteTerms(int maxBufferedDeleteTerms) {
if (maxBufferedDeleteTerms < 1)
throw new IllegalArgumentException("maxBufferedDeleteTerms must at least be 1");
this.maxBufferedDeleteTerms = maxBufferedDeleteTerms;
}
/**
* @see #setMaxBufferedDeleteTerms
*/
public int getMaxBufferedDeleteTerms() {
return maxBufferedDeleteTerms;
}
/** Determines how often segment indices are merged by addDocument(). With /** Determines how often segment indices are merged by addDocument(). With
* smaller values, less RAM is used while indexing, and searches on * smaller values, less RAM is used while indexing, and searches on
* unoptimized indices are faster, but indexing speed is slower. With larger * unoptimized indices are faster, but indexing speed is slower. With larger
@ -653,21 +698,73 @@ public class IndexWriter {
} }
} }
final SegmentInfo buildSingleDocSegment(Document doc, Analyzer analyzer) SegmentInfo buildSingleDocSegment(Document doc, Analyzer analyzer)
throws IOException { throws IOException {
DocumentWriter dw = new DocumentWriter(ramDirectory, analyzer, this); DocumentWriter dw = new DocumentWriter(ramDirectory, analyzer, this);
dw.setInfoStream(infoStream); dw.setInfoStream(infoStream);
String segmentName = newRAMSegmentName(); String segmentName = newRamSegmentName();
dw.addDocument(segmentName, doc); dw.addDocument(segmentName, doc);
return new SegmentInfo(segmentName, 1, ramDirectory, false, false); return new SegmentInfo(segmentName, 1, ramDirectory, false, false);
} }
// for test purpose /**
final synchronized int getRAMSegmentCount() { * Deletes the document(s) containing <code>term</code>.
return ramSegmentInfos.size(); * @param term the term to identify the documents to be deleted
*/
public synchronized void deleteDocuments(Term term) throws IOException {
bufferDeleteTerm(term);
maybeFlushRamSegments();
} }
final synchronized String newRAMSegmentName() { /**
* Deletes the document(s) containing any of the
* terms. All deletes are flushed at the same time.
* @param terms array of terms to identify the documents
* to be deleted
*/
public synchronized void deleteDocuments(Term[] terms) throws IOException {
for (int i = 0; i < terms.length; i++) {
bufferDeleteTerm(terms[i]);
}
maybeFlushRamSegments();
}
/**
* Updates a document by first deleting the document(s)
* containing <code>term</code> and then adding the new
* document. The delete and then add are atomic as seen
* by a reader on the same index (flush may happen only after
* the add).
* @param term the term to identify the document(s) to be
* deleted
* @param doc the document to be added
*/
public void updateDocument(Term term, Document doc) throws IOException {
updateDocument(term, doc, getAnalyzer());
}
/**
* Updates a document by first deleting the document(s)
* containing <code>term</code> and then adding the new
* document. The delete and then add are atomic as seen
* by a reader on the same index (flush may happen only after
* the add).
* @param term the term to identify the document(s) to be
* deleted
* @param doc the document to be added
* @param analyzer the analyzer to use when analyzing the document
*/
public void updateDocument(Term term, Document doc, Analyzer analyzer)
throws IOException {
SegmentInfo newSegmentInfo = buildSingleDocSegment(doc, analyzer);
synchronized (this) {
bufferDeleteTerm(term);
ramSegmentInfos.addElement(newSegmentInfo);
maybeFlushRamSegments();
}
}
final synchronized String newRamSegmentName() {
return "_ram_" + Integer.toString(ramSegmentInfos.counter++, Character.MAX_RADIX); return "_ram_" + Integer.toString(ramSegmentInfos.counter++, Character.MAX_RADIX);
} }
@ -676,6 +773,11 @@ public class IndexWriter {
return segmentInfos.size(); return segmentInfos.size();
} }
// for test purpose
final synchronized int getRamSegmentCount(){
return ramSegmentInfos.size();
}
// for test purpose // for test purpose
final synchronized int getDocCount(int i) { final synchronized int getDocCount(int i) {
if (i >= 0 && i < segmentInfos.size()) { if (i >= 0 && i < segmentInfos.size()) {
@ -1228,40 +1330,32 @@ public class IndexWriter {
// counts x and y, then f(x) >= f(y). // counts x and y, then f(x) >= f(y).
// 2: The number of committed segments on the same level (f(n)) <= M. // 2: The number of committed segments on the same level (f(n)) <= M.
protected boolean timeToFlushRam() { // This is called after pending added and deleted
return ramSegmentInfos.size() >= minMergeDocs; // documents have been flushed to the Directory but before
} // the change is committed (new segments_N file written).
void doAfterFlush()
protected boolean anythingToFlushRam() { throws IOException {
return ramSegmentInfos.size() > 0;
}
// true if only buffered inserts, no buffered deletes
protected boolean onlyRamDocsToFlush() {
return true;
}
// whether the latest segment is the flushed merge of ram segments
protected void doAfterFlushRamSegments(boolean flushedRamSegments)
throws IOException {
} }
protected final void maybeFlushRamSegments() throws IOException { protected final void maybeFlushRamSegments() throws IOException {
if (timeToFlushRam()) { // A flush is triggered if enough new documents are buffered or
// if enough delete terms are buffered
if (ramSegmentInfos.size() >= minMergeDocs || numBufferedDeleteTerms >= maxBufferedDeleteTerms) {
flushRamSegments(); flushRamSegments();
} }
} }
/** Expert: Flushes all RAM-resident segments (buffered documents), then may merge segments. */ /** Expert: Flushes all RAM-resident segments (buffered documents), then may merge segments. */
private final synchronized void flushRamSegments() throws IOException { private final synchronized void flushRamSegments() throws IOException {
if (anythingToFlushRam()) { if (ramSegmentInfos.size() > 0 || bufferedDeleteTerms.size() > 0) {
mergeSegments(ramSegmentInfos, 0, ramSegmentInfos.size()); mergeSegments(ramSegmentInfos, 0, ramSegmentInfos.size());
maybeMergeSegments(minMergeDocs); maybeMergeSegments(minMergeDocs);
} }
} }
/** /**
* Flush all in-memory buffered updates to the Directory. * Flush all in-memory buffered updates (adds and deletes)
* to the Directory.
* @throws IOException * @throws IOException
*/ */
public final synchronized void flush() throws IOException { public final synchronized void flush() throws IOException {
@ -1350,7 +1444,9 @@ public class IndexWriter {
private final int mergeSegments(SegmentInfos sourceSegments, int minSegment, int end) private final int mergeSegments(SegmentInfos sourceSegments, int minSegment, int end)
throws IOException { throws IOException {
boolean mergeFlag = end > 0; // We may be called solely because there are deletes
// pending, in which case doMerge is false:
boolean doMerge = end > 0;
final String mergedName = newSegmentName(); final String mergedName = newSegmentName();
SegmentMerger merger = null; SegmentMerger merger = null;
@ -1366,21 +1462,21 @@ public class IndexWriter {
// This is try/finally to make sure merger's readers are closed: // This is try/finally to make sure merger's readers are closed:
try { try {
if (mergeFlag) { if (doMerge) {
if (infoStream != null) infoStream.print("merging segments"); if (infoStream != null) infoStream.print("merging segments");
merger = new SegmentMerger(this, mergedName); merger = new SegmentMerger(this, mergedName);
for (int i = minSegment; i < end; i++) { for (int i = minSegment; i < end; i++) {
SegmentInfo si = sourceSegments.info(i); SegmentInfo si = sourceSegments.info(i);
if (infoStream != null) if (infoStream != null)
infoStream.print(" " + si.name + " (" + si.docCount + " docs)"); infoStream.print(" " + si.name + " (" + si.docCount + " docs)");
IndexReader reader = SegmentReader.get(si); // no need to set deleter (yet) IndexReader reader = SegmentReader.get(si); // no need to set deleter (yet)
merger.add(reader); merger.add(reader);
if ((reader.directory() == this.directory) || // if we own the directory if ((reader.directory() == this.directory) || // if we own the directory
(reader.directory() == this.ramDirectory)) (reader.directory() == this.ramDirectory))
segmentsToDelete.addElement(reader); // queue segment for deletion segmentsToDelete.addElement(reader); // queue segment for deletion
}
} }
}
SegmentInfos rollback = null; SegmentInfos rollback = null;
boolean success = false; boolean success = false;
@ -1389,40 +1485,41 @@ public class IndexWriter {
// if we hit exception when doing the merge: // if we hit exception when doing the merge:
try { try {
if (mergeFlag) { if (doMerge) {
mergedDocCount = merger.merge(); mergedDocCount = merger.merge();
if (infoStream != null) { if (infoStream != null) {
infoStream.println(" into "+mergedName+" ("+mergedDocCount+" docs)"); infoStream.println(" into "+mergedName+" ("+mergedDocCount+" docs)");
}
newSegment = new SegmentInfo(mergedName, mergedDocCount,
directory, false, true);
} }
newSegment = new SegmentInfo(mergedName, mergedDocCount,
directory, false, true);
}
if (!inTransaction if (!inTransaction
&& (sourceSegments != ramSegmentInfos || !onlyRamDocsToFlush())) { && (sourceSegments != ramSegmentInfos || bufferedDeleteTerms.size() > 0)) {
// Now save the SegmentInfo instances that // Now save the SegmentInfo instances that
// we are replacing: // we are replacing:
rollback = (SegmentInfos) segmentInfos.clone(); rollback = (SegmentInfos) segmentInfos.clone();
} }
if (mergeFlag) { if (doMerge) {
if (sourceSegments == ramSegmentInfos) { if (sourceSegments == ramSegmentInfos) {
segmentInfos.addElement(newSegment); segmentInfos.addElement(newSegment);
} else { } else {
for (int i = end-1; i > minSegment; i--) // remove old infos & add new for (int i = end-1; i > minSegment; i--) // remove old infos & add new
sourceSegments.remove(i); sourceSegments.remove(i);
segmentInfos.set(minSegment, newSegment); segmentInfos.set(minSegment, newSegment);
}
} }
}
if (sourceSegments == ramSegmentInfos) { if (sourceSegments == ramSegmentInfos) {
// Should not be necessary: no prior commit should // Should not be necessary: no prior commit should
// have left pending files, so just defensive: // have left pending files, so just defensive:
deleter.clearPendingFiles(); deleter.clearPendingFiles();
doAfterFlushRamSegments(mergeFlag); maybeApplyDeletes(doMerge);
doAfterFlush();
} }
if (!inTransaction) { if (!inTransaction) {
@ -1446,7 +1543,7 @@ public class IndexWriter {
// Must rollback so our state matches index: // Must rollback so our state matches index:
if (sourceSegments == ramSegmentInfos && onlyRamDocsToFlush()) { if (sourceSegments == ramSegmentInfos && 0 == bufferedDeleteTerms.size()) {
// Simple case: newSegment may or may not have // Simple case: newSegment may or may not have
// been added to the end of our segment infos, // been added to the end of our segment infos,
// so just check & remove if so: // so just check & remove if so:
@ -1476,21 +1573,21 @@ public class IndexWriter {
} }
} finally { } finally {
// close readers before we attempt to delete now-obsolete segments // close readers before we attempt to delete now-obsolete segments
if (mergeFlag) merger.closeReaders(); if (doMerge) merger.closeReaders();
} }
if (!inTransaction) { if (!inTransaction) {
// Attempt to delete all files we just obsoleted: // Attempt to delete all files we just obsoleted:
deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file
deleter.deleteSegments(segmentsToDelete); // delete now-unused segments deleter.deleteSegments(segmentsToDelete); // delete now-unused segments
// including the old del files // Includes the old del files
deleter.commitPendingFiles(); deleter.commitPendingFiles();
} else { } else {
deleter.addPendingFile(segmentsInfosFileName); // delete old segments_N file deleter.addPendingFile(segmentsInfosFileName); // delete old segments_N file
deleter.deleteSegments(segmentsToDelete, protectedSegments); // delete now-unused segments deleter.deleteSegments(segmentsToDelete, protectedSegments); // delete now-unused segments
} }
if (useCompoundFile && mergeFlag) { if (useCompoundFile && doMerge) {
segmentsInfosFileName = nextSegmentsFileName; segmentsInfosFileName = nextSegmentsFileName;
nextSegmentsFileName = segmentInfos.getNextSegmentFileName(); nextSegmentsFileName = segmentInfos.getNextSegmentFileName();
@ -1531,6 +1628,58 @@ public class IndexWriter {
return mergedDocCount; return mergedDocCount;
} }
// Called during flush to apply any buffered deletes. If
// doMerge is true then a new segment was just created and
// flushed from the ram segments.
private final void maybeApplyDeletes(boolean doMerge) throws IOException {
if (bufferedDeleteTerms.size() > 0) {
if (infoStream != null)
infoStream.println("flush " + numBufferedDeleteTerms + " buffered deleted terms on "
+ segmentInfos.size() + " segments.");
if (doMerge) {
IndexReader reader = null;
try {
reader = SegmentReader.get(segmentInfos.info(segmentInfos.size() - 1));
reader.setDeleter(deleter);
// Apply delete terms to the segment just flushed from ram
// apply appropriately so that a delete term is only applied to
// the documents buffered before it, not those buffered after it.
applyDeletesSelectively(bufferedDeleteTerms, reader);
} finally {
if (reader != null)
reader.close();
}
}
int infosEnd = segmentInfos.size();
if (doMerge) {
infosEnd--;
}
for (int i = 0; i < infosEnd; i++) {
IndexReader reader = null;
try {
reader = SegmentReader.get(segmentInfos.info(i));
reader.setDeleter(deleter);
// Apply delete terms to disk segments
// except the one just flushed from ram.
applyDeletes(bufferedDeleteTerms, reader);
} finally {
if (reader != null)
reader.close();
}
}
// Clean up bufferedDeleteTerms.
bufferedDeleteTerms.clear();
numBufferedDeleteTerms = 0;
}
}
private final boolean checkNonDecreasingLevels(int start) { private final boolean checkNonDecreasingLevels(int start) {
int lowerBound = -1; int lowerBound = -1;
int upperBound = minMergeDocs; int upperBound = minMergeDocs;
@ -1548,4 +1697,83 @@ public class IndexWriter {
} }
return true; return true;
} }
// For test purposes.
final synchronized int getBufferedDeleteTermsSize() {
return bufferedDeleteTerms.size();
}
// For test purposes.
final synchronized int getNumBufferedDeleteTerms() {
return numBufferedDeleteTerms;
}
// Number of ram segments a delete term applies to.
private class Num {
private int num;
Num(int num) {
this.num = num;
}
int getNum() {
return num;
}
void setNum(int num) {
this.num = num;
}
}
// Buffer a term in bufferedDeleteTerms, which records the
// current number of documents buffered in ram so that the
// delete term will be applied to those ram segments as
// well as the disk segments.
private void bufferDeleteTerm(Term term) {
Num num = (Num) bufferedDeleteTerms.get(term);
if (num == null) {
bufferedDeleteTerms.put(term, new Num(ramSegmentInfos.size()));
} else {
num.setNum(ramSegmentInfos.size());
}
numBufferedDeleteTerms++;
}
// Apply buffered delete terms to the segment just flushed from ram
// apply appropriately so that a delete term is only applied to
// the documents buffered before it, not those buffered after it.
private final void applyDeletesSelectively(HashMap deleteTerms,
IndexReader reader) throws IOException {
Iterator iter = deleteTerms.entrySet().iterator();
while (iter.hasNext()) {
Entry entry = (Entry) iter.next();
Term term = (Term) entry.getKey();
TermDocs docs = reader.termDocs(term);
if (docs != null) {
int num = ((Num) entry.getValue()).getNum();
try {
while (docs.next()) {
int doc = docs.doc();
if (doc >= num) {
break;
}
reader.deleteDocument(doc);
}
} finally {
docs.close();
}
}
}
}
// Apply buffered delete terms to this reader.
private final void applyDeletes(HashMap deleteTerms, IndexReader reader)
throws IOException {
Iterator iter = deleteTerms.entrySet().iterator();
while (iter.hasNext()) {
Entry entry = (Entry) iter.next();
reader.deleteDocuments((Term) entry.getKey());
}
}
} }

View File

@ -1,294 +0,0 @@
package org.apache.lucene.index;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.store.Directory;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map.Entry;
/**
* NewIndexModifier extends {@link IndexWriter} so that you can not only insert
* documents but also delete documents through a single interface. Internally,
* inserts and deletes are buffered before they are flushed to disk.
* <p>
* Design Overview
* <p>
* deleteDocuments() method works by buffering terms to be deleted. Deletes are
* deferred until ram is flushed to disk, either because enough new documents or
* delete terms are buffered, or because close() or flush() is called. Using
* Java synchronization, care is taken to ensure that an interleaved sequence of
* inserts and deletes for the same document are properly serialized.
*/
public class NewIndexModifier extends IndexWriter {
// number of ram segments a delete term applies to
private class Num {
private int num;
Num(int num) {
this.num = num;
}
int getNum() {
return num;
}
void setNum(int num) {
this.num = num;
}
}
/**
* Default value is 10. Change using {@link #setMaxBufferedDeleteTerms(int)}.
*/
public final static int DEFAULT_MAX_BUFFERED_DELETE_TERMS = 10;
// the max number of delete terms that can be buffered before
// they must be flushed to disk
private int maxBufferedDeleteTerms = DEFAULT_MAX_BUFFERED_DELETE_TERMS;
// to buffer delete terms in ram before they are applied
// key is delete term, value is number of ram segments the term applies to
private HashMap bufferedDeleteTerms = new HashMap();
private int numBufferedDeleteTerms = 0;
/**
* @see IndexWriter#IndexWriter(String, Analyzer, boolean)
*/
public NewIndexModifier(String path, Analyzer a, boolean create)
throws IOException {
super(path, a, create);
}
/**
* @see IndexWriter#IndexWriter(File, Analyzer, boolean)
*/
public NewIndexModifier(File path, Analyzer a, boolean create)
throws IOException {
super(path, a, create);
}
/**
* @see IndexWriter#IndexWriter(Directory, Analyzer, boolean)
*/
public NewIndexModifier(Directory d, Analyzer a, boolean create)
throws IOException {
super(d, a, create);
}
/**
* @see IndexWriter#IndexWriter(String, Analyzer)
*/
public NewIndexModifier(String path, Analyzer a) throws IOException {
super(path, a);
}
/**
* @see IndexWriter#IndexWriter(File, Analyzer)
*/
public NewIndexModifier(File path, Analyzer a) throws IOException {
super(path, a);
}
/**
* @see IndexWriter#IndexWriter(Directory, Analyzer)
*/
public NewIndexModifier(Directory d, Analyzer a) throws IOException {
super(d, a);
}
/**
* Determines the minimal number of delete terms required before the buffered
* in-memory delete terms are applied and flushed. If there are documents
* buffered in memory at the time, they are merged and a new Segment is
* created. The delete terms are applied appropriately.
* <p>
* The default value is 10.
* @throws IllegalArgumentException if maxBufferedDeleteTerms is smaller than
* 1
*/
public void setMaxBufferedDeleteTerms(int maxBufferedDeleteTerms) {
if (maxBufferedDeleteTerms < 1)
throw new IllegalArgumentException("maxBufferedDeleteTerms must at least be 1");
this.maxBufferedDeleteTerms = maxBufferedDeleteTerms;
}
/**
* @see #setMaxBufferedDeleteTerms
*/
public int getMaxBufferedDeleteTerms() {
return maxBufferedDeleteTerms;
}
// for test purpose
final synchronized int getBufferedDeleteTermsSize() {
return bufferedDeleteTerms.size();
}
// for test purpose
final synchronized int getNumBufferedDeleteTerms() {
return numBufferedDeleteTerms;
}
/**
* Updates a document by first deleting all documents containing
* <code>term</code> and then adding the new document.
*/
public void updateDocument(Term term, Document doc) throws IOException {
updateDocument(term, doc, getAnalyzer());
}
/**
* Updates a document by first deleting all documents containing
* <code>term</code> and then adding the new document.
*/
public void updateDocument(Term term, Document doc, Analyzer analyzer)
throws IOException {
SegmentInfo newSegmentInfo = buildSingleDocSegment(doc, analyzer);
synchronized (this) {
bufferDeleteTerm(term);
ramSegmentInfos.addElement(newSegmentInfo);
maybeFlushRamSegments();
}
}
/**
* Deletes all documents containing <code>term</code>.
*/
public synchronized void deleteDocuments(Term term) throws IOException {
bufferDeleteTerm(term);
maybeFlushRamSegments();
}
/**
* Deletes all documents containing any of the terms. All deletes are flushed
* at the same time.
*/
public synchronized void deleteDocuments(Term[] terms) throws IOException {
for (int i = 0; i < terms.length; i++) {
bufferDeleteTerm(terms[i]);
}
maybeFlushRamSegments();
}
// buffer a term in bufferedDeleteTerms. bufferedDeleteTerms also records
// the current number of documents buffered in ram so that the delete term
// will be applied to those ram segments as well as the disk segments
private void bufferDeleteTerm(Term term) {
Num num = (Num)bufferedDeleteTerms.get(term);
if (num == null) {
bufferedDeleteTerms.put(term, new Num(getRAMSegmentCount()));
} else {
num.setNum(getRAMSegmentCount());
}
numBufferedDeleteTerms++;
}
// a flush is triggered if enough new documents are buffered or
// if enough delete terms are buffered
protected boolean timeToFlushRam() {
return super.timeToFlushRam()
|| numBufferedDeleteTerms >= maxBufferedDeleteTerms;
}
protected boolean anythingToFlushRam() {
return super.anythingToFlushRam() || bufferedDeleteTerms.size() > 0;
}
protected boolean onlyRamDocsToFlush() {
return super.onlyRamDocsToFlush() && bufferedDeleteTerms.size() == 0;
}
protected void doAfterFlushRamSegments(boolean flushedRamSegments)
throws IOException {
if (bufferedDeleteTerms.size() > 0) {
if (getInfoStream() != null)
getInfoStream().println(
"flush " + numBufferedDeleteTerms + " buffered terms on "
+ segmentInfos.size() + " segments.");
if (flushedRamSegments) {
IndexReader reader = null;
try {
reader = SegmentReader.get(segmentInfos.info(segmentInfos.size() - 1));
reader.setDeleter(getDeleter());
// apply delete terms to the segment just flushed from ram
// apply appropriately so that a delete term is only applied to
// the documents buffered before it, not those buffered after it
applyDeletesSelectively(bufferedDeleteTerms, reader);
} finally {
if (reader != null)
reader.close();
}
}
int infosEnd = segmentInfos.size();
if (flushedRamSegments) {
infosEnd--;
}
for (int i = 0; i < infosEnd; i++) {
IndexReader reader = null;
try {
reader = SegmentReader.get(segmentInfos.info(i));
reader.setDeleter(getDeleter());
// apply delete terms to disk segments
// except the one just flushed from ram
applyDeletes(bufferedDeleteTerms, reader);
} finally {
if (reader != null)
reader.close();
}
}
// clean up bufferedDeleteTerms
bufferedDeleteTerms.clear();
numBufferedDeleteTerms = 0;
}
}
// apply buffered delete terms to the segment just flushed from ram
// apply appropriately so that a delete term is only applied to
// the documents buffered before it, not those buffered after it
private final void applyDeletesSelectively(HashMap deleteTerms,
IndexReader reader) throws IOException {
Iterator iter = deleteTerms.entrySet().iterator();
while (iter.hasNext()) {
Entry entry = (Entry)iter.next();
Term term = (Term)entry.getKey();
TermDocs docs = reader.termDocs(term);
if (docs != null) {
int num = ((Num)entry.getValue()).getNum();
try {
while (docs.next()) {
int doc = docs.doc();
if (doc >= num) {
break;
}
reader.deleteDocument(doc);
}
} finally {
docs.close();
}
}
}
}
// apply buffered delete terms to disk segments
// except the one just flushed from ram
private final void applyDeletes(HashMap deleteTerms, IndexReader reader)
throws IOException {
Iterator iter = deleteTerms.entrySet().iterator();
while (iter.hasNext()) {
Entry entry = (Entry)iter.next();
Term term = (Term)entry.getKey();
reader.deleteDocuments(term);
}
}
}

View File

@ -15,7 +15,7 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MockRAMDirectory; import org.apache.lucene.store.MockRAMDirectory;
import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.store.RAMDirectory;
public class TestNewIndexModifierDelete extends TestCase { public class TestIndexWriterDelete extends TestCase {
// test the simple case // test the simple case
public void testSimpleCase() throws IOException { public void testSimpleCase() throws IOException {
@ -26,7 +26,7 @@ public class TestNewIndexModifierDelete extends TestCase {
String[] text = { "Amsterdam", "Venice" }; String[] text = { "Amsterdam", "Venice" };
Directory dir = new RAMDirectory(); Directory dir = new RAMDirectory();
NewIndexModifier modifier = new NewIndexModifier(dir, IndexWriter modifier = new IndexWriter(dir,
new WhitespaceAnalyzer(), true); new WhitespaceAnalyzer(), true);
modifier.setUseCompoundFile(true); modifier.setUseCompoundFile(true);
modifier.setMaxBufferedDeleteTerms(1); modifier.setMaxBufferedDeleteTerms(1);
@ -59,7 +59,7 @@ public class TestNewIndexModifierDelete extends TestCase {
// test when delete terms only apply to disk segments // test when delete terms only apply to disk segments
public void testNonRAMDelete() throws IOException { public void testNonRAMDelete() throws IOException {
Directory dir = new RAMDirectory(); Directory dir = new RAMDirectory();
NewIndexModifier modifier = new NewIndexModifier(dir, IndexWriter modifier = new IndexWriter(dir,
new WhitespaceAnalyzer(), true); new WhitespaceAnalyzer(), true);
modifier.setMaxBufferedDocs(2); modifier.setMaxBufferedDocs(2);
modifier.setMaxBufferedDeleteTerms(2); modifier.setMaxBufferedDeleteTerms(2);
@ -72,7 +72,7 @@ public class TestNewIndexModifierDelete extends TestCase {
} }
modifier.flush(); modifier.flush();
assertEquals(0, modifier.getRAMSegmentCount()); assertEquals(0, modifier.getRamSegmentCount());
assertTrue(0 < modifier.getSegmentCount()); assertTrue(0 < modifier.getSegmentCount());
IndexReader reader = IndexReader.open(dir); IndexReader reader = IndexReader.open(dir);
@ -92,7 +92,7 @@ public class TestNewIndexModifierDelete extends TestCase {
// test when delete terms only apply to ram segments // test when delete terms only apply to ram segments
public void testRAMDeletes() throws IOException { public void testRAMDeletes() throws IOException {
Directory dir = new RAMDirectory(); Directory dir = new RAMDirectory();
NewIndexModifier modifier = new NewIndexModifier(dir, IndexWriter modifier = new IndexWriter(dir,
new WhitespaceAnalyzer(), true); new WhitespaceAnalyzer(), true);
modifier.setMaxBufferedDocs(4); modifier.setMaxBufferedDocs(4);
modifier.setMaxBufferedDeleteTerms(4); modifier.setMaxBufferedDeleteTerms(4);
@ -125,7 +125,7 @@ public class TestNewIndexModifierDelete extends TestCase {
// test when delete terms apply to both disk and ram segments // test when delete terms apply to both disk and ram segments
public void testBothDeletes() throws IOException { public void testBothDeletes() throws IOException {
Directory dir = new RAMDirectory(); Directory dir = new RAMDirectory();
NewIndexModifier modifier = new NewIndexModifier(dir, IndexWriter modifier = new IndexWriter(dir,
new WhitespaceAnalyzer(), true); new WhitespaceAnalyzer(), true);
modifier.setMaxBufferedDocs(100); modifier.setMaxBufferedDocs(100);
modifier.setMaxBufferedDeleteTerms(100); modifier.setMaxBufferedDeleteTerms(100);
@ -158,7 +158,7 @@ public class TestNewIndexModifierDelete extends TestCase {
// test that batched delete terms are flushed together // test that batched delete terms are flushed together
public void testBatchDeletes() throws IOException { public void testBatchDeletes() throws IOException {
Directory dir = new RAMDirectory(); Directory dir = new RAMDirectory();
NewIndexModifier modifier = new NewIndexModifier(dir, IndexWriter modifier = new IndexWriter(dir,
new WhitespaceAnalyzer(), true); new WhitespaceAnalyzer(), true);
modifier.setMaxBufferedDocs(2); modifier.setMaxBufferedDocs(2);
modifier.setMaxBufferedDeleteTerms(2); modifier.setMaxBufferedDeleteTerms(2);
@ -196,7 +196,7 @@ public class TestNewIndexModifierDelete extends TestCase {
modifier.close(); modifier.close();
} }
private void addDoc(NewIndexModifier modifier, int id, int value) private void addDoc(IndexWriter modifier, int id, int value)
throws IOException { throws IOException {
Document doc = new Document(); Document doc = new Document();
doc.add(new Field("content", "aaa", Field.Store.NO, Field.Index.TOKENIZED)); doc.add(new Field("content", "aaa", Field.Store.NO, Field.Index.TOKENIZED));
@ -257,7 +257,7 @@ public class TestNewIndexModifierDelete extends TestCase {
// Iterate w/ ever increasing free disk space: // Iterate w/ ever increasing free disk space:
while (!done) { while (!done) {
MockRAMDirectory dir = new MockRAMDirectory(startDir); MockRAMDirectory dir = new MockRAMDirectory(startDir);
NewIndexModifier modifier = new NewIndexModifier(dir, IndexWriter modifier = new IndexWriter(dir,
new WhitespaceAnalyzer(), false); new WhitespaceAnalyzer(), false);
modifier.setMaxBufferedDocs(1000); // use flush or close modifier.setMaxBufferedDocs(1000); // use flush or close
@ -314,11 +314,9 @@ public class TestNewIndexModifierDelete extends TestCase {
Field.Index.UN_TOKENIZED)); Field.Index.UN_TOKENIZED));
d.add(new Field("content", "bbb " + i, Field.Store.NO, d.add(new Field("content", "bbb " + i, Field.Store.NO,
Field.Index.TOKENIZED)); Field.Index.TOKENIZED));
modifier.updateDocument( modifier.updateDocument(new Term("id", Integer.toString(docId)), d);
new Term("id", Integer.toString(docId)), d);
} else { // deletes } else { // deletes
modifier modifier.deleteDocuments(new Term("id", Integer.toString(docId)));
.deleteDocuments(new Term("id", Integer.toString(docId)));
// modifier.setNorm(docId, "contents", (float)2.0); // modifier.setNorm(docId, "contents", (float)2.0);
} }
docId += 12; docId += 12;

View File

@ -40,7 +40,7 @@ public class TestIndexWriterMergePolicy extends TestCase {
for (int i = 0; i < 100; i++) { for (int i = 0; i < 100; i++) {
addDoc(writer); addDoc(writer);
checkInvariants(writer); checkInvariants(writer);
if (writer.getRAMSegmentCount() + writer.getSegmentCount() >= 18) { if (writer.getRamSegmentCount() + writer.getSegmentCount() >= 18) {
noOverMerge = true; noOverMerge = true;
} }
} }
@ -178,7 +178,7 @@ public class TestIndexWriterMergePolicy extends TestCase {
int mergeFactor = writer.getMergeFactor(); int mergeFactor = writer.getMergeFactor();
int maxMergeDocs = writer.getMaxMergeDocs(); int maxMergeDocs = writer.getMaxMergeDocs();
int ramSegmentCount = writer.getRAMSegmentCount(); int ramSegmentCount = writer.getRamSegmentCount();
assertTrue(ramSegmentCount < maxBufferedDocs); assertTrue(ramSegmentCount < maxBufferedDocs);
int lowerBound = -1; int lowerBound = -1;