mirror of https://github.com/apache/lucene.git
Optimize IndexWriter.addIndexes(Directory[]).
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@179611 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
eea1c63a62
commit
098a0e95b8
|
@ -113,7 +113,6 @@ New features
|
|||
fields in arbitrarily formats can be cached as ints and floats.
|
||||
(Doug Cutting)
|
||||
|
||||
|
||||
API Changes
|
||||
|
||||
1. Several methods and fields have been deprecated. The API documentation
|
||||
|
@ -169,7 +168,6 @@ Bug fixes
|
|||
corrupted when the old version of a file was longer than the new.
|
||||
Now any existing file is first removed. (Doug Cutting)
|
||||
|
||||
|
||||
Optimizations
|
||||
|
||||
1. Disk usage (peak requirements during indexing and optimization)
|
||||
|
@ -210,6 +208,11 @@ Optimizations
|
|||
term index is now read into memory lazily at the first
|
||||
random-access. (Doug Cutting)
|
||||
|
||||
9. Optimize IndexWriter.addIndexes(Directory[]) when the number of
|
||||
added indexes is larger than mergeFactor. Previously this could
|
||||
result in quadratic performance. Now performance is n log(n).
|
||||
(Doug Cutting)
|
||||
|
||||
Infrastructure
|
||||
|
||||
1. Lucene's source code repository has converted from CVS to
|
||||
|
|
|
@ -547,6 +547,9 @@ public class IndexWriter {
|
|||
public synchronized void addIndexes(Directory[] dirs)
|
||||
throws IOException {
|
||||
optimize(); // start with zero or 1 seg
|
||||
|
||||
int start = segmentInfos.size();
|
||||
|
||||
for (int i = 0; i < dirs.length; i++) {
|
||||
SegmentInfos sis = new SegmentInfos(); // read infos from dir
|
||||
sis.read(dirs[i]);
|
||||
|
@ -554,6 +557,16 @@ public class IndexWriter {
|
|||
segmentInfos.addElement(sis.info(j)); // add each info
|
||||
}
|
||||
}
|
||||
|
||||
// merge newly added segments in log(n) passes
|
||||
while (segmentInfos.size() > start+mergeFactor) {
|
||||
for (int base = start+1; base < segmentInfos.size(); base++) {
|
||||
int end = Math.min(segmentInfos.size(), base+mergeFactor);
|
||||
if (end-base > 1)
|
||||
mergeSegments(base, end);
|
||||
}
|
||||
}
|
||||
|
||||
optimize(); // final cleanup
|
||||
}
|
||||
|
||||
|
@ -659,12 +672,19 @@ public class IndexWriter {
|
|||
and pushes the merged index onto the top of the segmentInfos stack. */
|
||||
private final void mergeSegments(int minSegment)
|
||||
throws IOException {
|
||||
mergeSegments(minSegment, segmentInfos.size());
|
||||
}
|
||||
|
||||
/** Merges the named range of segments, replacing them in the stack with a
|
||||
* single segment. */
|
||||
private final void mergeSegments(int minSegment, int end)
|
||||
throws IOException {
|
||||
final String mergedName = newSegmentName();
|
||||
if (infoStream != null) infoStream.print("merging segments");
|
||||
SegmentMerger merger = new SegmentMerger(this, mergedName);
|
||||
|
||||
final Vector segmentsToDelete = new Vector();
|
||||
for (int i = minSegment; i < segmentInfos.size(); i++) {
|
||||
for (int i = minSegment; i < end; i++) {
|
||||
SegmentInfo si = segmentInfos.info(i);
|
||||
if (infoStream != null)
|
||||
infoStream.print(" " + si.name + " (" + si.docCount + " docs)");
|
||||
|
@ -681,7 +701,8 @@ public class IndexWriter {
|
|||
infoStream.println(" into "+mergedName+" ("+mergedDocCount+" docs)");
|
||||
}
|
||||
|
||||
segmentInfos.setSize(minSegment); // pop old infos & add new
|
||||
for (int i = end-1; i >= minSegment; i--) // remove old infos & add new
|
||||
segmentInfos.remove(i);
|
||||
segmentInfos.addElement(new SegmentInfo(mergedName, mergedDocCount,
|
||||
directory));
|
||||
|
||||
|
|
Loading…
Reference in New Issue