mirror of https://github.com/apache/lucene.git
Optimize IndexWriter.addIndexes(Directory[]).
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@179611 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
eea1c63a62
commit
098a0e95b8
|
@ -113,7 +113,6 @@ New features
|
||||||
fields in arbitrarily formats can be cached as ints and floats.
|
fields in arbitrarily formats can be cached as ints and floats.
|
||||||
(Doug Cutting)
|
(Doug Cutting)
|
||||||
|
|
||||||
|
|
||||||
API Changes
|
API Changes
|
||||||
|
|
||||||
1. Several methods and fields have been deprecated. The API documentation
|
1. Several methods and fields have been deprecated. The API documentation
|
||||||
|
@ -169,7 +168,6 @@ Bug fixes
|
||||||
corrupted when the old version of a file was longer than the new.
|
corrupted when the old version of a file was longer than the new.
|
||||||
Now any existing file is first removed. (Doug Cutting)
|
Now any existing file is first removed. (Doug Cutting)
|
||||||
|
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
|
|
||||||
1. Disk usage (peak requirements during indexing and optimization)
|
1. Disk usage (peak requirements during indexing and optimization)
|
||||||
|
@ -210,6 +208,11 @@ Optimizations
|
||||||
term index is now read into memory lazily at the first
|
term index is now read into memory lazily at the first
|
||||||
random-access. (Doug Cutting)
|
random-access. (Doug Cutting)
|
||||||
|
|
||||||
|
9. Optimize IndexWriter.addIndexes(Directory[]) when the number of
|
||||||
|
added indexes is larger than mergeFactor. Previously this could
|
||||||
|
result in quadratic performance. Now performance is n log(n).
|
||||||
|
(Doug Cutting)
|
||||||
|
|
||||||
Infrastructure
|
Infrastructure
|
||||||
|
|
||||||
1. Lucene's source code repository has converted from CVS to
|
1. Lucene's source code repository has converted from CVS to
|
||||||
|
|
|
@ -547,6 +547,9 @@ public class IndexWriter {
|
||||||
public synchronized void addIndexes(Directory[] dirs)
|
public synchronized void addIndexes(Directory[] dirs)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
optimize(); // start with zero or 1 seg
|
optimize(); // start with zero or 1 seg
|
||||||
|
|
||||||
|
int start = segmentInfos.size();
|
||||||
|
|
||||||
for (int i = 0; i < dirs.length; i++) {
|
for (int i = 0; i < dirs.length; i++) {
|
||||||
SegmentInfos sis = new SegmentInfos(); // read infos from dir
|
SegmentInfos sis = new SegmentInfos(); // read infos from dir
|
||||||
sis.read(dirs[i]);
|
sis.read(dirs[i]);
|
||||||
|
@ -554,6 +557,16 @@ public class IndexWriter {
|
||||||
segmentInfos.addElement(sis.info(j)); // add each info
|
segmentInfos.addElement(sis.info(j)); // add each info
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// merge newly added segments in log(n) passes
|
||||||
|
while (segmentInfos.size() > start+mergeFactor) {
|
||||||
|
for (int base = start+1; base < segmentInfos.size(); base++) {
|
||||||
|
int end = Math.min(segmentInfos.size(), base+mergeFactor);
|
||||||
|
if (end-base > 1)
|
||||||
|
mergeSegments(base, end);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
optimize(); // final cleanup
|
optimize(); // final cleanup
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -659,12 +672,19 @@ public class IndexWriter {
|
||||||
and pushes the merged index onto the top of the segmentInfos stack. */
|
and pushes the merged index onto the top of the segmentInfos stack. */
|
||||||
private final void mergeSegments(int minSegment)
|
private final void mergeSegments(int minSegment)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
mergeSegments(minSegment, segmentInfos.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Merges the named range of segments, replacing them in the stack with a
|
||||||
|
* single segment. */
|
||||||
|
private final void mergeSegments(int minSegment, int end)
|
||||||
|
throws IOException {
|
||||||
final String mergedName = newSegmentName();
|
final String mergedName = newSegmentName();
|
||||||
if (infoStream != null) infoStream.print("merging segments");
|
if (infoStream != null) infoStream.print("merging segments");
|
||||||
SegmentMerger merger = new SegmentMerger(this, mergedName);
|
SegmentMerger merger = new SegmentMerger(this, mergedName);
|
||||||
|
|
||||||
final Vector segmentsToDelete = new Vector();
|
final Vector segmentsToDelete = new Vector();
|
||||||
for (int i = minSegment; i < segmentInfos.size(); i++) {
|
for (int i = minSegment; i < end; i++) {
|
||||||
SegmentInfo si = segmentInfos.info(i);
|
SegmentInfo si = segmentInfos.info(i);
|
||||||
if (infoStream != null)
|
if (infoStream != null)
|
||||||
infoStream.print(" " + si.name + " (" + si.docCount + " docs)");
|
infoStream.print(" " + si.name + " (" + si.docCount + " docs)");
|
||||||
|
@ -681,7 +701,8 @@ public class IndexWriter {
|
||||||
infoStream.println(" into "+mergedName+" ("+mergedDocCount+" docs)");
|
infoStream.println(" into "+mergedName+" ("+mergedDocCount+" docs)");
|
||||||
}
|
}
|
||||||
|
|
||||||
segmentInfos.setSize(minSegment); // pop old infos & add new
|
for (int i = end-1; i >= minSegment; i--) // remove old infos & add new
|
||||||
|
segmentInfos.remove(i);
|
||||||
segmentInfos.addElement(new SegmentInfo(mergedName, mergedDocCount,
|
segmentInfos.addElement(new SegmentInfo(mergedName, mergedDocCount,
|
||||||
directory));
|
directory));
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue