Optimize IndexWriter.addIndexes(Directory[]).

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@179611 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Doug Cutting 2005-06-02 17:05:58 +00:00
parent eea1c63a62
commit 098a0e95b8
2 changed files with 28 additions and 4 deletions

View File

@ -113,7 +113,6 @@ New features
fields in arbitrarily formats can be cached as ints and floats. fields in arbitrarily formats can be cached as ints and floats.
(Doug Cutting) (Doug Cutting)
API Changes API Changes
1. Several methods and fields have been deprecated. The API documentation 1. Several methods and fields have been deprecated. The API documentation
@ -169,7 +168,6 @@ Bug fixes
corrupted when the old version of a file was longer than the new. corrupted when the old version of a file was longer than the new.
Now any existing file is first removed. (Doug Cutting) Now any existing file is first removed. (Doug Cutting)
Optimizations Optimizations
1. Disk usage (peak requirements during indexing and optimization) 1. Disk usage (peak requirements during indexing and optimization)
@ -210,6 +208,11 @@ Optimizations
term index is now read into memory lazily at the first term index is now read into memory lazily at the first
random-access. (Doug Cutting) random-access. (Doug Cutting)
9. Optimize IndexWriter.addIndexes(Directory[]) when the number of
added indexes is larger than mergeFactor. Previously this could
result in quadratic performance. Now performance is n log(n).
(Doug Cutting)
Infrastructure Infrastructure
1. Lucene's source code repository has converted from CVS to 1. Lucene's source code repository has converted from CVS to

View File

@ -547,6 +547,9 @@ public class IndexWriter {
public synchronized void addIndexes(Directory[] dirs) public synchronized void addIndexes(Directory[] dirs)
throws IOException { throws IOException {
optimize(); // start with zero or 1 seg optimize(); // start with zero or 1 seg
int start = segmentInfos.size();
for (int i = 0; i < dirs.length; i++) { for (int i = 0; i < dirs.length; i++) {
SegmentInfos sis = new SegmentInfos(); // read infos from dir SegmentInfos sis = new SegmentInfos(); // read infos from dir
sis.read(dirs[i]); sis.read(dirs[i]);
@ -554,6 +557,16 @@ public class IndexWriter {
segmentInfos.addElement(sis.info(j)); // add each info segmentInfos.addElement(sis.info(j)); // add each info
} }
} }
// merge newly added segments in log(n) passes
while (segmentInfos.size() > start+mergeFactor) {
for (int base = start+1; base < segmentInfos.size(); base++) {
int end = Math.min(segmentInfos.size(), base+mergeFactor);
if (end-base > 1)
mergeSegments(base, end);
}
}
optimize(); // final cleanup optimize(); // final cleanup
} }
@ -659,12 +672,19 @@ public class IndexWriter {
and pushes the merged index onto the top of the segmentInfos stack. */ and pushes the merged index onto the top of the segmentInfos stack. */
private final void mergeSegments(int minSegment) private final void mergeSegments(int minSegment)
throws IOException { throws IOException {
mergeSegments(minSegment, segmentInfos.size());
}
/** Merges the named range of segments, replacing them in the stack with a
* single segment. */
private final void mergeSegments(int minSegment, int end)
throws IOException {
final String mergedName = newSegmentName(); final String mergedName = newSegmentName();
if (infoStream != null) infoStream.print("merging segments"); if (infoStream != null) infoStream.print("merging segments");
SegmentMerger merger = new SegmentMerger(this, mergedName); SegmentMerger merger = new SegmentMerger(this, mergedName);
final Vector segmentsToDelete = new Vector(); final Vector segmentsToDelete = new Vector();
for (int i = minSegment; i < segmentInfos.size(); i++) { for (int i = minSegment; i < end; i++) {
SegmentInfo si = segmentInfos.info(i); SegmentInfo si = segmentInfos.info(i);
if (infoStream != null) if (infoStream != null)
infoStream.print(" " + si.name + " (" + si.docCount + " docs)"); infoStream.print(" " + si.name + " (" + si.docCount + " docs)");
@ -681,7 +701,8 @@ public class IndexWriter {
infoStream.println(" into "+mergedName+" ("+mergedDocCount+" docs)"); infoStream.println(" into "+mergedName+" ("+mergedDocCount+" docs)");
} }
segmentInfos.setSize(minSegment); // pop old infos & add new for (int i = end-1; i >= minSegment; i--) // remove old infos & add new
segmentInfos.remove(i);
segmentInfos.addElement(new SegmentInfo(mergedName, mergedDocCount, segmentInfos.addElement(new SegmentInfo(mergedName, mergedDocCount,
directory)); directory));