new IndexWriter.addIndexesNoOptimize(): LUCENE-528

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@468177 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2006-10-26 22:47:15 +00:00
parent 19bf841c27
commit 4dfdd6a0af
3 changed files with 156 additions and 8 deletions

View File

@ -39,6 +39,10 @@ New features
5. LUCENE-544: Added the ability to specify different boosts for different 5. LUCENE-544: Added the ability to specify different boosts for different
fields when using MultiFieldQueryParser (Matt Ericson via Otis Gospodnetic) fields when using MultiFieldQueryParser (Matt Ericson via Otis Gospodnetic)
6. LUCENE-528: New IndexWriter.addIndexesNoOptimize() that doesn't optimize the
index when adding new segments, only performing merges as needed.
(Ning Li via Yonik Seeley)
API Changes API Changes
1. LUCENE-438: Remove "final" from Token, implement Cloneable, allow 1. LUCENE-438: Remove "final" from Token, implement Cloneable, allow

View File

@ -632,6 +632,130 @@ public class IndexWriter {
optimize(); // final cleanup optimize(); // final cleanup
} }
/**
* Merges all segments from an array of indexes into this index.
* <p>
* This is similar to addIndexes(Directory[]). However, no optimize()
* is called either at the beginning or at the end. Instead, merges
* are carried out as necessary.
* <p>
* This requires this index not be among those to be added, and the
* upper bound* of those segment doc counts not exceed maxMergeDocs.
*/
public synchronized void addIndexesNoOptimize(Directory[] dirs)
throws IOException {
// Adding indexes can be viewed as adding a sequence of segments S to
// a sequence of segments T. Segments in T follow the invariants but
// segments in S may not since they could come from multiple indexes.
// Here is the merge algorithm for addIndexesNoOptimize():
//
// 1 Flush ram segments.
// 2 Consider a combined sequence with segments from T followed
// by segments from S (same as current addIndexes(Directory[])).
// 3 Assume the highest level for segments in S is h. Call
// maybeMergeSegments(), but instead of starting w/ lowerBound = -1
// and upperBound = maxBufferedDocs, start w/ lowerBound = -1 and
// upperBound = upperBound of level h. After this, the invariants
// are guaranteed except for the last < M segments whose levels <= h.
// 4 If the invariants hold for the last < M segments whose levels <= h,
// if some of those < M segments are from S (not merged in step 3),
// properly copy them over*, otherwise done.
// Otherwise, simply merge those segments. If the merge results in
// a segment of level <= h, done. Otherwise, it's of level h+1 and call
// maybeMergeSegments() starting w/ upperBound = upperBound of level h+1.
//
// * Ideally, we want to simply copy a segment. However, directory does
// not support copy yet. In addition, source may use compound file or not
// and target may use compound file or not. So we use mergeSegments() to
// copy a segment, which may cause doc count to change because deleted
// docs are garbage collected.
//
// In current addIndexes(Directory[]), segment infos in S are added to
// T's "segmentInfos" upfront. Then segments in S are merged to T several
// at a time. Every merge is committed with T's "segmentInfos". So if
// a reader is opened on T while addIndexes() is going on, it could see
// an inconsistent index. AddIndexesNoOptimize() has a similar behaviour.
// 1 flush ram segments
flushRamSegments();
// 2 copy segment infos and find the highest level from dirs
int start = segmentInfos.size();
int startUpperBound = minMergeDocs;
try {
for (int i = 0; i < dirs.length; i++) {
if (directory == dirs[i]) {
// cannot add this index: segments may be deleted in merge before added
throw new IllegalArgumentException("Cannot add this index to itself");
}
SegmentInfos sis = new SegmentInfos(); // read infos from dir
sis.read(dirs[i]);
for (int j = 0; j < sis.size(); j++) {
SegmentInfo info = sis.info(j);
segmentInfos.addElement(info); // add each info
while (startUpperBound < info.docCount) {
startUpperBound *= mergeFactor; // find the highest level from dirs
if (startUpperBound > maxMergeDocs) {
// upper bound cannot exceed maxMergeDocs
throw new IllegalArgumentException("Upper bound cannot exceed maxMergeDocs");
}
}
}
}
} catch (IllegalArgumentException e) {
for (int i = segmentInfos.size() - 1; i >= start; i--) {
segmentInfos.remove(i);
}
throw e;
}
// 3 maybe merge segments starting from the highest level from dirs
maybeMergeSegments(startUpperBound);
// get the tail segments whose levels <= h
int segmentCount = segmentInfos.size();
int numTailSegments = 0;
while (numTailSegments < segmentCount
&& startUpperBound >= segmentInfos.info(segmentCount - 1 - numTailSegments).docCount) {
numTailSegments++;
}
if (numTailSegments == 0) {
return;
}
// 4 make sure invariants hold for the tail segments whose levels <= h
if (checkNonDecreasingLevels(segmentCount - numTailSegments)) {
// identify the segments from S to be copied (not merged in 3)
int numSegmentsToCopy = 0;
while (numSegmentsToCopy < segmentCount
&& directory != segmentInfos.info(segmentCount - 1 - numSegmentsToCopy).dir) {
numSegmentsToCopy++;
}
if (numSegmentsToCopy == 0) {
return;
}
// copy those segments from S
for (int i = segmentCount - numSegmentsToCopy; i < segmentCount; i++) {
mergeSegments(segmentInfos, i, i + 1);
}
if (checkNonDecreasingLevels(segmentCount - numSegmentsToCopy)) {
return;
}
}
// invariants do not hold, simply merge those segments
mergeSegments(segmentInfos, segmentCount - numTailSegments, segmentCount);
// maybe merge segments again if necessary
if (segmentInfos.info(segmentInfos.size() - 1).docCount > startUpperBound) {
maybeMergeSegments(startUpperBound * mergeFactor);
}
}
/** Merges the provided indexes into this index. /** Merges the provided indexes into this index.
* <p>After this completes, the index is optimized. </p> * <p>After this completes, the index is optimized. </p>
* <p>The provided IndexReaders are not closed.</p> * <p>The provided IndexReaders are not closed.</p>
@ -735,16 +859,16 @@ public class IndexWriter {
private final void flushRamSegments() throws IOException { private final void flushRamSegments() throws IOException {
if (ramSegmentInfos.size() > 0) { if (ramSegmentInfos.size() > 0) {
mergeSegments(ramSegmentInfos, 0, ramSegmentInfos.size()); mergeSegments(ramSegmentInfos, 0, ramSegmentInfos.size());
maybeMergeSegments(); maybeMergeSegments(minMergeDocs);
} }
} }
/** Incremental segment merger. */ /** Incremental segment merger. */
private final void maybeMergeSegments() throws IOException { private final void maybeMergeSegments(int startUpperBound) throws IOException {
long lowerBound = -1; long lowerBound = -1;
long upperBound = minMergeDocs; long upperBound = startUpperBound;
while (upperBound * mergeFactor <= maxMergeDocs) { while (upperBound < maxMergeDocs) {
int minSegment = segmentInfos.size(); int minSegment = segmentInfos.size();
int maxSegment = -1; int maxSegment = -1;
@ -949,4 +1073,22 @@ public class IndexWriter {
} }
directory.renameFile("deleteable.new", IndexFileNames.DELETABLE); directory.renameFile("deleteable.new", IndexFileNames.DELETABLE);
} }
private final boolean checkNonDecreasingLevels(int start) {
int lowerBound = -1;
int upperBound = minMergeDocs;
for (int i = segmentInfos.size() - 1; i >= start; i--) {
int docCount = segmentInfos.info(i).docCount;
if (docCount <= lowerBound) {
return false;
}
while (docCount > upperBound) {
lowerBound = upperBound;
upperBound *= mergeFactor;
}
}
return true;
}
} }

View File

@ -181,14 +181,14 @@ public class TestIndexWriterMergePolicy extends TestCase {
int ramSegmentCount = writer.getRAMSegmentCount(); int ramSegmentCount = writer.getRAMSegmentCount();
assertTrue(ramSegmentCount < maxBufferedDocs); assertTrue(ramSegmentCount < maxBufferedDocs);
int lowerBound = 0; int lowerBound = -1;
int upperBound = maxBufferedDocs; int upperBound = maxBufferedDocs;
int numSegments = 0; int numSegments = 0;
int segmentCount = writer.getSegmentCount(); int segmentCount = writer.getSegmentCount();
for (int i = segmentCount - 1; i >= 0; i--) { for (int i = segmentCount - 1; i >= 0; i--) {
int docCount = writer.getDocCount(i); int docCount = writer.getDocCount(i);
assertTrue(docCount > lowerBound || docCount == 0); assertTrue(docCount > lowerBound);
if (docCount <= upperBound) { if (docCount <= upperBound) {
numSegments++; numSegments++;
@ -197,8 +197,10 @@ public class TestIndexWriterMergePolicy extends TestCase {
assertTrue(numSegments < mergeFactor); assertTrue(numSegments < mergeFactor);
} }
do {
lowerBound = upperBound; lowerBound = upperBound;
upperBound *= mergeFactor; upperBound *= mergeFactor;
} while (docCount > upperBound);
numSegments = 1; numSegments = 1;
} }
} }