keep track of number of buffered documents: LUCENE-388

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@432125 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2006-08-17 02:52:21 +00:00
parent d516bf50d8
commit 8e3608845d
2 changed files with 73 additions and 15 deletions

View File

@ -121,6 +121,10 @@ Optimizations
during segment merges (e.g. during indexing or optimizing), thus improving during segment merges (e.g. during indexing or optimizing), thus improving
performance . (Michael Busch via Otis Gospodnetic) performance . (Michael Busch via Otis Gospodnetic)
4. LUCENE-388: Improve indexing performance when maxBufferedDocs is large by
keeping a count of buffered documents rather than counting after each
document addition. (Doron Cohen, Paul Smith, Yonik Seeley)
Release 2.0.0 2006-05-26 Release 2.0.0 2006-05-26
API Changes API Changes

View File

@ -110,6 +110,7 @@ public class IndexWriter {
private SegmentInfos segmentInfos = new SegmentInfos(); // the segments private SegmentInfos segmentInfos = new SegmentInfos(); // the segments
private final Directory ramDirectory = new RAMDirectory(); // for temp segs private final Directory ramDirectory = new RAMDirectory(); // for temp segs
private int singleDocSegmentsCount = 0; // for speeding decision on merge candidates
private Lock writeLock; private Lock writeLock;
private int termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL; private int termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL;
@ -433,6 +434,7 @@ public class IndexWriter {
/** Flushes all changes to an index and closes all associated files. */ /** Flushes all changes to an index and closes all associated files. */
public synchronized void close() throws IOException { public synchronized void close() throws IOException {
flushRamSegments(); flushRamSegments();
// testInvariants();
ramDirectory.close(); ramDirectory.close();
if (writeLock != null) { if (writeLock != null) {
writeLock.release(); // release write lock writeLock.release(); // release write lock
@ -509,8 +511,10 @@ public class IndexWriter {
dw.addDocument(segmentName, doc); dw.addDocument(segmentName, doc);
synchronized (this) { synchronized (this) {
segmentInfos.addElement(new SegmentInfo(segmentName, 1, ramDirectory)); segmentInfos.addElement(new SegmentInfo(segmentName, 1, ramDirectory));
singleDocSegmentsCount++;
maybeMergeSegments(); maybeMergeSegments();
} }
// testInvariants();
} }
final int getSegmentsCounter(){ final int getSegmentsCounter(){
@ -575,6 +579,7 @@ public class IndexWriter {
int minSegment = segmentInfos.size() - mergeFactor; int minSegment = segmentInfos.size() - mergeFactor;
mergeSegments(minSegment < 0 ? 0 : minSegment); mergeSegments(minSegment < 0 ? 0 : minSegment);
} }
// testInvariants();
} }
/** Merges all segments from an array of indexes into this index. /** Merges all segments from an array of indexes into this index.
@ -610,6 +615,7 @@ public class IndexWriter {
} }
optimize(); // final cleanup optimize(); // final cleanup
// testInvariants();
} }
/** Merges the provided indexes into this index. /** Merges the provided indexes into this index.
@ -669,6 +675,8 @@ public class IndexWriter {
// delete now unused files of segment // delete now unused files of segment
deleteFiles(filesToDelete); deleteFiles(filesToDelete);
} }
// testInvariants();
} }
/** Merges all RAM-resident segments. */ /** Merges all RAM-resident segments. */
@ -694,8 +702,8 @@ public class IndexWriter {
long targetMergeDocs = minMergeDocs; long targetMergeDocs = minMergeDocs;
while (targetMergeDocs <= maxMergeDocs) { while (targetMergeDocs <= maxMergeDocs) {
// find segments smaller than current target size // find segments smaller than current target size
int minSegment = segmentInfos.size(); int minSegment = segmentInfos.size() - singleDocSegmentsCount; // top 1-doc segments are taken for sure
int mergeDocs = 0; int mergeDocs = singleDocSegmentsCount;
while (--minSegment >= 0) { while (--minSegment >= 0) {
SegmentInfo si = segmentInfos.info(minSegment); SegmentInfo si = segmentInfos.info(minSegment);
if (si.docCount >= targetMergeDocs) if (si.docCount >= targetMergeDocs)
@ -703,10 +711,12 @@ public class IndexWriter {
mergeDocs += si.docCount; mergeDocs += si.docCount;
} }
if (mergeDocs >= targetMergeDocs) // found a merge to do if (mergeDocs >= targetMergeDocs) { // found a merge to do
mergeSegments(minSegment+1); mergeSegments(minSegment+1);
else singleDocSegmentsCount = 0;
} else {
break; break;
}
targetMergeDocs *= mergeFactor; // increase target size targetMergeDocs *= mergeFactor; // increase target size
} }
@ -781,6 +791,50 @@ public class IndexWriter {
} }
} }
/***
private synchronized void testInvariants() {
// index segments should decrease in size
int maxSegLevel = 0;
for (int i=segmentInfos.size()-1; i>=0; i--) {
SegmentInfo si = segmentInfos.info(i);
int segLevel = (si.docCount)/minMergeDocs;
if (segLevel < maxSegLevel) {
throw new RuntimeException("Segment #" + i + " is too small. " + segInfo());
}
maxSegLevel = Math.max(maxSegLevel,segLevel);
}
// check if merges needed
long targetMergeDocs = minMergeDocs;
int minSegment = segmentInfos.size();
while (targetMergeDocs <= maxMergeDocs && minSegment>=0) {
int mergeDocs = 0;
while (--minSegment >= 0) {
SegmentInfo si = segmentInfos.info(minSegment);
if (si.docCount >= targetMergeDocs) break;
mergeDocs += si.docCount;
}
if (mergeDocs >= targetMergeDocs) {
throw new RuntimeException("Merge needed at level "+targetMergeDocs + " :"+segInfo());
}
targetMergeDocs *= mergeFactor; // increase target size
}
}
private String segInfo() {
StringBuffer sb = new StringBuffer("minMergeDocs="+minMergeDocs+" singleDocSegmentsCount="+singleDocSegmentsCount+" segsizes:");
for (int i=0; i<segmentInfos.size(); i++) {
sb.append(segmentInfos.info(i).docCount);
sb.append(",");
}
return sb.toString();
}
***/
/* /*
* Some operating systems (e.g. Windows) don't permit a file to be deleted * Some operating systems (e.g. Windows) don't permit a file to be deleted
* while it is opened for read (e.g. by another process or thread). So we * while it is opened for read (e.g. by another process or thread). So we