Optimize IndexWriter.addIndexes(Directory[]).

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@179611 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Doug Cutting 2005-06-02 17:05:58 +00:00
parent eea1c63a62
commit 098a0e95b8
2 changed files with 28 additions and 4 deletions

View File

@ -113,7 +113,6 @@ New features
fields in arbitrarily formats can be cached as ints and floats.
(Doug Cutting)
API Changes
1. Several methods and fields have been deprecated. The API documentation
@ -169,7 +168,6 @@ Bug fixes
corrupted when the old version of a file was longer than the new.
Now any existing file is first removed. (Doug Cutting)
Optimizations
1. Disk usage (peak requirements during indexing and optimization)
@ -210,6 +208,11 @@ Optimizations
term index is now read into memory lazily at the first
random-access. (Doug Cutting)
9. Optimize IndexWriter.addIndexes(Directory[]) when the number of
added indexes is larger than mergeFactor. Previously this could
result in quadratic performance. Now performance is n log(n).
(Doug Cutting)
Infrastructure
1. Lucene's source code repository has converted from CVS to

View File

@ -547,6 +547,9 @@ public class IndexWriter {
public synchronized void addIndexes(Directory[] dirs)
throws IOException {
optimize(); // start with zero or 1 seg
int start = segmentInfos.size();
for (int i = 0; i < dirs.length; i++) {
SegmentInfos sis = new SegmentInfos(); // read infos from dir
sis.read(dirs[i]);
@ -554,6 +557,16 @@ public class IndexWriter {
segmentInfos.addElement(sis.info(j)); // add each info
}
}
// merge newly added segments in log(n) passes
while (segmentInfos.size() > start+mergeFactor) {
for (int base = start+1; base < segmentInfos.size(); base++) {
int end = Math.min(segmentInfos.size(), base+mergeFactor);
if (end-base > 1)
mergeSegments(base, end);
}
}
optimize(); // final cleanup
}
@ -659,12 +672,19 @@ public class IndexWriter {
and pushes the merged index onto the top of the segmentInfos stack. */
private final void mergeSegments(int minSegment)
throws IOException {
mergeSegments(minSegment, segmentInfos.size());
}
/** Merges the named range of segments, replacing them in the stack with a
* single segment. */
private final void mergeSegments(int minSegment, int end)
throws IOException {
final String mergedName = newSegmentName();
if (infoStream != null) infoStream.print("merging segments");
SegmentMerger merger = new SegmentMerger(this, mergedName);
final Vector segmentsToDelete = new Vector();
for (int i = minSegment; i < segmentInfos.size(); i++) {
for (int i = minSegment; i < end; i++) {
SegmentInfo si = segmentInfos.info(i);
if (infoStream != null)
infoStream.print(" " + si.name + " (" + si.docCount + " docs)");
@ -681,7 +701,8 @@ public class IndexWriter {
infoStream.println(" into "+mergedName+" ("+mergedDocCount+" docs)");
}
segmentInfos.setSize(minSegment); // pop old infos & add new
for (int i = end-1; i >= minSegment; i--) // remove old infos & add new
segmentInfos.remove(i);
segmentInfos.addElement(new SegmentInfo(mergedName, mergedDocCount,
directory));