mirror of https://github.com/apache/lucene.git
new IndexWriter.addIndexesNoOptimize(): LUCENE-528
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@468177 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
19bf841c27
commit
4dfdd6a0af
|
@ -39,6 +39,10 @@ New features
|
||||||
5. LUCENE-544: Added the ability to specify different boosts for different
|
5. LUCENE-544: Added the ability to specify different boosts for different
|
||||||
fields when using MultiFieldQueryParser (Matt Ericson via Otis Gospodnetic)
|
fields when using MultiFieldQueryParser (Matt Ericson via Otis Gospodnetic)
|
||||||
|
|
||||||
|
6. LUCENE-528: New IndexWriter.addIndexesNoOptimize() that doesn't optimize the
|
||||||
|
index when adding new segments, only performing merges as needed.
|
||||||
|
(Ning Li via Yonik Seeley)
|
||||||
|
|
||||||
API Changes
|
API Changes
|
||||||
|
|
||||||
1. LUCENE-438: Remove "final" from Token, implement Cloneable, allow
|
1. LUCENE-438: Remove "final" from Token, implement Cloneable, allow
|
||||||
|
|
|
@ -632,6 +632,130 @@ public class IndexWriter {
|
||||||
optimize(); // final cleanup
|
optimize(); // final cleanup
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Merges all segments from an array of indexes into this index.
|
||||||
|
* <p>
|
||||||
|
* This is similar to addIndexes(Directory[]). However, no optimize()
|
||||||
|
* is called either at the beginning or at the end. Instead, merges
|
||||||
|
* are carried out as necessary.
|
||||||
|
* <p>
|
||||||
|
* This requires this index not be among those to be added, and the
|
||||||
|
* upper bound* of those segment doc counts not exceed maxMergeDocs.
|
||||||
|
*/
|
||||||
|
public synchronized void addIndexesNoOptimize(Directory[] dirs)
|
||||||
|
throws IOException {
|
||||||
|
// Adding indexes can be viewed as adding a sequence of segments S to
|
||||||
|
// a sequence of segments T. Segments in T follow the invariants but
|
||||||
|
// segments in S may not since they could come from multiple indexes.
|
||||||
|
// Here is the merge algorithm for addIndexesNoOptimize():
|
||||||
|
//
|
||||||
|
// 1 Flush ram segments.
|
||||||
|
// 2 Consider a combined sequence with segments from T followed
|
||||||
|
// by segments from S (same as current addIndexes(Directory[])).
|
||||||
|
// 3 Assume the highest level for segments in S is h. Call
|
||||||
|
// maybeMergeSegments(), but instead of starting w/ lowerBound = -1
|
||||||
|
// and upperBound = maxBufferedDocs, start w/ lowerBound = -1 and
|
||||||
|
// upperBound = upperBound of level h. After this, the invariants
|
||||||
|
// are guaranteed except for the last < M segments whose levels <= h.
|
||||||
|
// 4 If the invariants hold for the last < M segments whose levels <= h,
|
||||||
|
// if some of those < M segments are from S (not merged in step 3),
|
||||||
|
// properly copy them over*, otherwise done.
|
||||||
|
// Otherwise, simply merge those segments. If the merge results in
|
||||||
|
// a segment of level <= h, done. Otherwise, it's of level h+1 and call
|
||||||
|
// maybeMergeSegments() starting w/ upperBound = upperBound of level h+1.
|
||||||
|
//
|
||||||
|
// * Ideally, we want to simply copy a segment. However, directory does
|
||||||
|
// not support copy yet. In addition, source may use compound file or not
|
||||||
|
// and target may use compound file or not. So we use mergeSegments() to
|
||||||
|
// copy a segment, which may cause doc count to change because deleted
|
||||||
|
// docs are garbage collected.
|
||||||
|
//
|
||||||
|
// In current addIndexes(Directory[]), segment infos in S are added to
|
||||||
|
// T's "segmentInfos" upfront. Then segments in S are merged to T several
|
||||||
|
// at a time. Every merge is committed with T's "segmentInfos". So if
|
||||||
|
// a reader is opened on T while addIndexes() is going on, it could see
|
||||||
|
// an inconsistent index. AddIndexesNoOptimize() has a similar behaviour.
|
||||||
|
|
||||||
|
// 1 flush ram segments
|
||||||
|
flushRamSegments();
|
||||||
|
|
||||||
|
// 2 copy segment infos and find the highest level from dirs
|
||||||
|
int start = segmentInfos.size();
|
||||||
|
int startUpperBound = minMergeDocs;
|
||||||
|
|
||||||
|
try {
|
||||||
|
for (int i = 0; i < dirs.length; i++) {
|
||||||
|
if (directory == dirs[i]) {
|
||||||
|
// cannot add this index: segments may be deleted in merge before added
|
||||||
|
throw new IllegalArgumentException("Cannot add this index to itself");
|
||||||
|
}
|
||||||
|
|
||||||
|
SegmentInfos sis = new SegmentInfos(); // read infos from dir
|
||||||
|
sis.read(dirs[i]);
|
||||||
|
for (int j = 0; j < sis.size(); j++) {
|
||||||
|
SegmentInfo info = sis.info(j);
|
||||||
|
segmentInfos.addElement(info); // add each info
|
||||||
|
|
||||||
|
while (startUpperBound < info.docCount) {
|
||||||
|
startUpperBound *= mergeFactor; // find the highest level from dirs
|
||||||
|
if (startUpperBound > maxMergeDocs) {
|
||||||
|
// upper bound cannot exceed maxMergeDocs
|
||||||
|
throw new IllegalArgumentException("Upper bound cannot exceed maxMergeDocs");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (IllegalArgumentException e) {
|
||||||
|
for (int i = segmentInfos.size() - 1; i >= start; i--) {
|
||||||
|
segmentInfos.remove(i);
|
||||||
|
}
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3 maybe merge segments starting from the highest level from dirs
|
||||||
|
maybeMergeSegments(startUpperBound);
|
||||||
|
|
||||||
|
// get the tail segments whose levels <= h
|
||||||
|
int segmentCount = segmentInfos.size();
|
||||||
|
int numTailSegments = 0;
|
||||||
|
while (numTailSegments < segmentCount
|
||||||
|
&& startUpperBound >= segmentInfos.info(segmentCount - 1 - numTailSegments).docCount) {
|
||||||
|
numTailSegments++;
|
||||||
|
}
|
||||||
|
if (numTailSegments == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4 make sure invariants hold for the tail segments whose levels <= h
|
||||||
|
if (checkNonDecreasingLevels(segmentCount - numTailSegments)) {
|
||||||
|
// identify the segments from S to be copied (not merged in 3)
|
||||||
|
int numSegmentsToCopy = 0;
|
||||||
|
while (numSegmentsToCopy < segmentCount
|
||||||
|
&& directory != segmentInfos.info(segmentCount - 1 - numSegmentsToCopy).dir) {
|
||||||
|
numSegmentsToCopy++;
|
||||||
|
}
|
||||||
|
if (numSegmentsToCopy == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// copy those segments from S
|
||||||
|
for (int i = segmentCount - numSegmentsToCopy; i < segmentCount; i++) {
|
||||||
|
mergeSegments(segmentInfos, i, i + 1);
|
||||||
|
}
|
||||||
|
if (checkNonDecreasingLevels(segmentCount - numSegmentsToCopy)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// invariants do not hold, simply merge those segments
|
||||||
|
mergeSegments(segmentInfos, segmentCount - numTailSegments, segmentCount);
|
||||||
|
|
||||||
|
// maybe merge segments again if necessary
|
||||||
|
if (segmentInfos.info(segmentInfos.size() - 1).docCount > startUpperBound) {
|
||||||
|
maybeMergeSegments(startUpperBound * mergeFactor);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/** Merges the provided indexes into this index.
|
/** Merges the provided indexes into this index.
|
||||||
* <p>After this completes, the index is optimized. </p>
|
* <p>After this completes, the index is optimized. </p>
|
||||||
* <p>The provided IndexReaders are not closed.</p>
|
* <p>The provided IndexReaders are not closed.</p>
|
||||||
|
@ -735,16 +859,16 @@ public class IndexWriter {
|
||||||
private final void flushRamSegments() throws IOException {
|
private final void flushRamSegments() throws IOException {
|
||||||
if (ramSegmentInfos.size() > 0) {
|
if (ramSegmentInfos.size() > 0) {
|
||||||
mergeSegments(ramSegmentInfos, 0, ramSegmentInfos.size());
|
mergeSegments(ramSegmentInfos, 0, ramSegmentInfos.size());
|
||||||
maybeMergeSegments();
|
maybeMergeSegments(minMergeDocs);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Incremental segment merger. */
|
/** Incremental segment merger. */
|
||||||
private final void maybeMergeSegments() throws IOException {
|
private final void maybeMergeSegments(int startUpperBound) throws IOException {
|
||||||
long lowerBound = -1;
|
long lowerBound = -1;
|
||||||
long upperBound = minMergeDocs;
|
long upperBound = startUpperBound;
|
||||||
|
|
||||||
while (upperBound * mergeFactor <= maxMergeDocs) {
|
while (upperBound < maxMergeDocs) {
|
||||||
int minSegment = segmentInfos.size();
|
int minSegment = segmentInfos.size();
|
||||||
int maxSegment = -1;
|
int maxSegment = -1;
|
||||||
|
|
||||||
|
@ -949,4 +1073,22 @@ public class IndexWriter {
|
||||||
}
|
}
|
||||||
directory.renameFile("deleteable.new", IndexFileNames.DELETABLE);
|
directory.renameFile("deleteable.new", IndexFileNames.DELETABLE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private final boolean checkNonDecreasingLevels(int start) {
|
||||||
|
int lowerBound = -1;
|
||||||
|
int upperBound = minMergeDocs;
|
||||||
|
|
||||||
|
for (int i = segmentInfos.size() - 1; i >= start; i--) {
|
||||||
|
int docCount = segmentInfos.info(i).docCount;
|
||||||
|
if (docCount <= lowerBound) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (docCount > upperBound) {
|
||||||
|
lowerBound = upperBound;
|
||||||
|
upperBound *= mergeFactor;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -181,14 +181,14 @@ public class TestIndexWriterMergePolicy extends TestCase {
|
||||||
int ramSegmentCount = writer.getRAMSegmentCount();
|
int ramSegmentCount = writer.getRAMSegmentCount();
|
||||||
assertTrue(ramSegmentCount < maxBufferedDocs);
|
assertTrue(ramSegmentCount < maxBufferedDocs);
|
||||||
|
|
||||||
int lowerBound = 0;
|
int lowerBound = -1;
|
||||||
int upperBound = maxBufferedDocs;
|
int upperBound = maxBufferedDocs;
|
||||||
int numSegments = 0;
|
int numSegments = 0;
|
||||||
|
|
||||||
int segmentCount = writer.getSegmentCount();
|
int segmentCount = writer.getSegmentCount();
|
||||||
for (int i = segmentCount - 1; i >= 0; i--) {
|
for (int i = segmentCount - 1; i >= 0; i--) {
|
||||||
int docCount = writer.getDocCount(i);
|
int docCount = writer.getDocCount(i);
|
||||||
assertTrue(docCount > lowerBound || docCount == 0);
|
assertTrue(docCount > lowerBound);
|
||||||
|
|
||||||
if (docCount <= upperBound) {
|
if (docCount <= upperBound) {
|
||||||
numSegments++;
|
numSegments++;
|
||||||
|
@ -197,8 +197,10 @@ public class TestIndexWriterMergePolicy extends TestCase {
|
||||||
assertTrue(numSegments < mergeFactor);
|
assertTrue(numSegments < mergeFactor);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
do {
|
||||||
lowerBound = upperBound;
|
lowerBound = upperBound;
|
||||||
upperBound *= mergeFactor;
|
upperBound *= mergeFactor;
|
||||||
|
} while (docCount > upperBound);
|
||||||
numSegments = 1;
|
numSegments = 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue