mirror of https://github.com/apache/lucene.git
LUCENE-2701: merge 3x to trunk
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1025577 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
8fe2254ed7
commit
f515772f69
|
@ -397,6 +397,10 @@ Changes in runtime behavior
|
|||
test lock just before the real lock is acquired. (Surinder Pal
|
||||
Singh Bindra via Mike McCandless)
|
||||
|
||||
* LUCENE-2701: maxMergeMB and maxMergeDocs constraints set on LogMergePolicy now
|
||||
affect optimize() as well (as opposed to only regular merges). This means that
|
||||
you can run optimize() and too large segments won't be merged. (Shai Erera)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-2076: Rename FSDirectory.getFile -> getDirectory. (George
|
||||
|
|
|
@ -103,31 +103,6 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy {
|
|||
}
|
||||
}
|
||||
|
||||
private boolean isOptimized(SegmentInfos infos, IndexWriter writer, int maxNumSegments, Set<SegmentInfo> segmentsToOptimize) throws IOException {
|
||||
final int numSegments = infos.size();
|
||||
int numToOptimize = 0;
|
||||
SegmentInfo optimizeInfo = null;
|
||||
for(int i=0;i<numSegments && numToOptimize <= maxNumSegments;i++) {
|
||||
final SegmentInfo info = infos.info(i);
|
||||
if (segmentsToOptimize.contains(info)) {
|
||||
numToOptimize++;
|
||||
optimizeInfo = info;
|
||||
}
|
||||
}
|
||||
|
||||
return numToOptimize <= maxNumSegments &&
|
||||
(numToOptimize != 1 || isOptimized(writer, optimizeInfo));
|
||||
}
|
||||
|
||||
private boolean isOptimized(IndexWriter writer, SegmentInfo info)
|
||||
throws IOException {
|
||||
assert writer != null;
|
||||
return !info.hasDeletions() &&
|
||||
!info.hasSeparateNorms() &&
|
||||
info.dir == writer.getDirectory() &&
|
||||
info.getUseCompoundFile() == getUseCompoundFile();
|
||||
}
|
||||
|
||||
@Override
|
||||
public MergeSpecification findMergesForOptimize(SegmentInfos infos, int maxNumSegments, Set<SegmentInfo> segmentsToOptimize) throws IOException {
|
||||
|
||||
|
@ -135,7 +110,7 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy {
|
|||
|
||||
MergeSpecification spec = null;
|
||||
|
||||
if (!isOptimized(infos, writer.get(), maxNumSegments, segmentsToOptimize)) {
|
||||
if (!isOptimized(infos, maxNumSegments, segmentsToOptimize)) {
|
||||
|
||||
// Find the newest (rightmost) segment that needs to
|
||||
// be optimized (other segments may have been flushed
|
||||
|
@ -158,7 +133,7 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy {
|
|||
// Since we must optimize down to 1 segment, the
|
||||
// choice is simple:
|
||||
boolean useCompoundFile = getUseCompoundFile();
|
||||
if (last > 1 || !isOptimized(writer.get(), infos.info(0))) {
|
||||
if (last > 1 || !isOptimized(infos.info(0))) {
|
||||
|
||||
spec = new MergeSpecification();
|
||||
spec.add(new OneMerge(infos.range(0, last), useCompoundFile));
|
||||
|
|
|
@ -54,16 +54,16 @@ public abstract class LogMergePolicy extends MergePolicy {
|
|||
* or larger will never be merged. @see setMaxMergeDocs */
|
||||
public static final int DEFAULT_MAX_MERGE_DOCS = Integer.MAX_VALUE;
|
||||
|
||||
private int mergeFactor = DEFAULT_MERGE_FACTOR;
|
||||
protected int mergeFactor = DEFAULT_MERGE_FACTOR;
|
||||
|
||||
long minMergeSize;
|
||||
long maxMergeSize;
|
||||
int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;
|
||||
protected long minMergeSize;
|
||||
protected long maxMergeSize;
|
||||
protected int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;
|
||||
|
||||
protected boolean calibrateSizeByDeletes = true;
|
||||
|
||||
private boolean useCompoundFile = true;
|
||||
private boolean useCompoundDocStore = true;
|
||||
protected boolean useCompoundFile = true;
|
||||
protected boolean useCompoundDocStore = true;
|
||||
|
||||
public LogMergePolicy() {
|
||||
super();
|
||||
|
@ -74,7 +74,7 @@ public abstract class LogMergePolicy extends MergePolicy {
|
|||
return w != null && w.verbose();
|
||||
}
|
||||
|
||||
private void message(String message) {
|
||||
protected void message(String message) {
|
||||
if (verbose())
|
||||
writer.get().message("LMP: " + message);
|
||||
}
|
||||
|
@ -180,7 +180,7 @@ public abstract class LogMergePolicy extends MergePolicy {
|
|||
}
|
||||
}
|
||||
|
||||
private boolean isOptimized(SegmentInfos infos, int maxNumSegments, Set<SegmentInfo> segmentsToOptimize) throws IOException {
|
||||
protected boolean isOptimized(SegmentInfos infos, int maxNumSegments, Set<SegmentInfo> segmentsToOptimize) throws IOException {
|
||||
final int numSegments = infos.size();
|
||||
int numToOptimize = 0;
|
||||
SegmentInfo optimizeInfo = null;
|
||||
|
@ -199,7 +199,7 @@ public abstract class LogMergePolicy extends MergePolicy {
|
|||
/** Returns true if this single info is optimized (has no
|
||||
* pending norms or deletes, is in the same dir as the
|
||||
* writer, and matches the current compound file setting */
|
||||
private boolean isOptimized(SegmentInfo info)
|
||||
protected boolean isOptimized(SegmentInfo info)
|
||||
throws IOException {
|
||||
IndexWriter w = writer.get();
|
||||
assert w != null;
|
||||
|
@ -210,6 +210,103 @@ public abstract class LogMergePolicy extends MergePolicy {
|
|||
info.getUseCompoundFile() == useCompoundFile;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the merges necessary to optimize the index, taking the max merge
|
||||
* size or max merge docs into consideration. This method attempts to respect
|
||||
* the {@code maxNumSegments} parameter, however it might be, due to size
|
||||
* constraints, that more than that number of segments will remain in the
|
||||
* index. Also, this method does not guarantee that exactly {@code
|
||||
* maxNumSegments} will remain, but <= that number.
|
||||
*/
|
||||
private MergeSpecification findMergesForOptimizeSizeLimit(
|
||||
SegmentInfos infos, int maxNumSegments, int last) throws IOException {
|
||||
MergeSpecification spec = new MergeSpecification();
|
||||
|
||||
int start = last - 1;
|
||||
while (start >= 0) {
|
||||
SegmentInfo info = infos.info(start);
|
||||
if (size(info) > maxMergeSize || sizeDocs(info) > maxMergeDocs) {
|
||||
// need to skip that segment + add a merge for the 'right' segments,
|
||||
// unless there is only 1 which is optimized.
|
||||
if (last - start - 1 > 1 || (start != last - 1 && !isOptimized(infos.info(start + 1)))) {
|
||||
// there is more than 1 segment to the right of this one, or an unoptimized single segment.
|
||||
spec.add(new OneMerge(infos.range(start + 1, last), useCompoundFile));
|
||||
}
|
||||
last = start;
|
||||
} else if (last - start == mergeFactor) {
|
||||
// mergeFactor eligible segments were found, add them as a merge.
|
||||
spec.add(new OneMerge(infos.range(start, last), useCompoundFile));
|
||||
last = start;
|
||||
}
|
||||
--start;
|
||||
}
|
||||
|
||||
// Add any left-over segments, unless there is just 1 already optimized.
|
||||
if (last > 0 && (++start + 1 < last || !isOptimized(infos.info(start)))) {
|
||||
spec.add(new OneMerge(infos.range(start, last), useCompoundFile));
|
||||
}
|
||||
|
||||
return spec.merges.size() == 0 ? null : spec;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the merges necessary to optimize the index. This method constraints
|
||||
* the returned merges only by the {@code maxNumSegments} parameter, and
|
||||
* guaranteed that exactly that number of segments will remain in the index.
|
||||
*/
|
||||
private MergeSpecification findMergesForOptimizeMaxNumSegments(SegmentInfos infos, int maxNumSegments, int last) throws IOException {
|
||||
MergeSpecification spec = new MergeSpecification();
|
||||
|
||||
// First, enroll all "full" merges (size
|
||||
// mergeFactor) to potentially be run concurrently:
|
||||
while (last - maxNumSegments + 1 >= mergeFactor) {
|
||||
spec.add(new OneMerge(infos.range(last-mergeFactor, last), useCompoundFile));
|
||||
last -= mergeFactor;
|
||||
}
|
||||
|
||||
// Only if there are no full merges pending do we
|
||||
// add a final partial (< mergeFactor segments) merge:
|
||||
if (0 == spec.merges.size()) {
|
||||
if (maxNumSegments == 1) {
|
||||
|
||||
// Since we must optimize down to 1 segment, the
|
||||
// choice is simple:
|
||||
if (last > 1 || !isOptimized(infos.info(0))) {
|
||||
spec.add(new OneMerge(infos.range(0, last), useCompoundFile));
|
||||
}
|
||||
} else if (last > maxNumSegments) {
|
||||
|
||||
// Take care to pick a partial merge that is
|
||||
// least cost, but does not make the index too
|
||||
// lopsided. If we always just picked the
|
||||
// partial tail then we could produce a highly
|
||||
// lopsided index over time:
|
||||
|
||||
// We must merge this many segments to leave
|
||||
// maxNumSegments in the index (from when
|
||||
// optimize was first kicked off):
|
||||
final int finalMergeSize = last - maxNumSegments + 1;
|
||||
|
||||
// Consider all possible starting points:
|
||||
long bestSize = 0;
|
||||
int bestStart = 0;
|
||||
|
||||
for(int i=0;i<last-finalMergeSize+1;i++) {
|
||||
long sumSize = 0;
|
||||
for(int j=0;j<finalMergeSize;j++)
|
||||
sumSize += size(infos.info(j+i));
|
||||
if (i == 0 || (sumSize < 2*size(infos.info(i-1)) && sumSize < bestSize)) {
|
||||
bestStart = i;
|
||||
bestSize = sumSize;
|
||||
}
|
||||
}
|
||||
|
||||
spec.add(new OneMerge(infos.range(bestStart, bestStart+finalMergeSize), useCompoundFile));
|
||||
}
|
||||
}
|
||||
return spec.merges.size() == 0 ? null : spec;
|
||||
}
|
||||
|
||||
/** Returns the merges necessary to optimize the index.
|
||||
* This merge policy defines "optimized" to mean only one
|
||||
* segment in the index, where that segment has no
|
||||
|
@ -221,81 +318,45 @@ public abstract class LogMergePolicy extends MergePolicy {
|
|||
@Override
|
||||
public MergeSpecification findMergesForOptimize(SegmentInfos infos,
|
||||
int maxNumSegments, Set<SegmentInfo> segmentsToOptimize) throws IOException {
|
||||
MergeSpecification spec;
|
||||
|
||||
assert maxNumSegments > 0;
|
||||
|
||||
if (!isOptimized(infos, maxNumSegments, segmentsToOptimize)) {
|
||||
|
||||
// Find the newest (rightmost) segment that needs to
|
||||
// be optimized (other segments may have been flushed
|
||||
// since optimize started):
|
||||
int last = infos.size();
|
||||
while(last > 0) {
|
||||
final SegmentInfo info = infos.info(--last);
|
||||
if (segmentsToOptimize.contains(info)) {
|
||||
last++;
|
||||
break;
|
||||
}
|
||||
// If the segments are already optimized (e.g. there's only 1 segment), or
|
||||
// there are <maxNumSegements, all optimized, nothing to do.
|
||||
if (isOptimized(infos, maxNumSegments, segmentsToOptimize)) return null;
|
||||
|
||||
// Find the newest (rightmost) segment that needs to
|
||||
// be optimized (other segments may have been flushed
|
||||
// since optimize started):
|
||||
int last = infos.size();
|
||||
while (last > 0) {
|
||||
final SegmentInfo info = infos.info(--last);
|
||||
if (segmentsToOptimize.contains(info)) {
|
||||
last++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (last > 0) {
|
||||
if (last == 0) return null;
|
||||
|
||||
// There is only one segment already, and it is optimized
|
||||
if (maxNumSegments == 1 && last == 1 && isOptimized(infos.info(0))) return null;
|
||||
|
||||
spec = new MergeSpecification();
|
||||
|
||||
// First, enroll all "full" merges (size
|
||||
// mergeFactor) to potentially be run concurrently:
|
||||
while (last - maxNumSegments + 1 >= mergeFactor) {
|
||||
spec.add(new OneMerge(infos.range(last-mergeFactor, last), useCompoundFile));
|
||||
last -= mergeFactor;
|
||||
}
|
||||
|
||||
// Only if there are no full merges pending do we
|
||||
// add a final partial (< mergeFactor segments) merge:
|
||||
if (0 == spec.merges.size()) {
|
||||
if (maxNumSegments == 1) {
|
||||
|
||||
// Since we must optimize down to 1 segment, the
|
||||
// choice is simple:
|
||||
if (last > 1 || !isOptimized(infos.info(0)))
|
||||
spec.add(new OneMerge(infos.range(0, last), useCompoundFile));
|
||||
} else if (last > maxNumSegments) {
|
||||
|
||||
// Take care to pick a partial merge that is
|
||||
// least cost, but does not make the index too
|
||||
// lopsided. If we always just picked the
|
||||
// partial tail then we could produce a highly
|
||||
// lopsided index over time:
|
||||
|
||||
// We must merge this many segments to leave
|
||||
// maxNumSegments in the index (from when
|
||||
// optimize was first kicked off):
|
||||
final int finalMergeSize = last - maxNumSegments + 1;
|
||||
|
||||
// Consider all possible starting points:
|
||||
long bestSize = 0;
|
||||
int bestStart = 0;
|
||||
|
||||
for(int i=0;i<last-finalMergeSize+1;i++) {
|
||||
long sumSize = 0;
|
||||
for(int j=0;j<finalMergeSize;j++)
|
||||
sumSize += size(infos.info(j+i));
|
||||
if (i == 0 || (sumSize < 2*size(infos.info(i-1)) && sumSize < bestSize)) {
|
||||
bestStart = i;
|
||||
bestSize = sumSize;
|
||||
}
|
||||
}
|
||||
|
||||
spec.add(new OneMerge(infos.range(bestStart, bestStart+finalMergeSize), useCompoundFile));
|
||||
}
|
||||
}
|
||||
|
||||
} else
|
||||
spec = null;
|
||||
} else
|
||||
spec = null;
|
||||
|
||||
return spec;
|
||||
// Check if there are any segments above the threshold
|
||||
boolean anyTooLarge = false;
|
||||
for (int i = 0; i < last; i++) {
|
||||
SegmentInfo info = infos.info(i);
|
||||
if (size(info) > maxMergeSize || sizeDocs(info) > maxMergeDocs) {
|
||||
anyTooLarge = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (anyTooLarge) {
|
||||
return findMergesForOptimizeSizeLimit(infos, maxNumSegments, last);
|
||||
} else {
|
||||
return findMergesForOptimizeMaxNumSegments(infos, maxNumSegments, last);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -77,8 +77,8 @@ public abstract class MergePolicy implements java.io.Closeable {
|
|||
SegmentReader[] readers; // used by IndexWriter
|
||||
SegmentReader[] readersClone; // used by IndexWriter
|
||||
List<String> mergeFiles; // used by IndexWriter
|
||||
final SegmentInfos segments;
|
||||
final boolean useCompoundFile;
|
||||
public final SegmentInfos segments;
|
||||
public final boolean useCompoundFile;
|
||||
boolean aborted;
|
||||
Throwable error;
|
||||
boolean paused;
|
||||
|
@ -146,7 +146,7 @@ public abstract class MergePolicy implements java.io.Closeable {
|
|||
return paused;
|
||||
}
|
||||
|
||||
String segString(Directory dir) {
|
||||
public String segString(Directory dir) {
|
||||
StringBuilder b = new StringBuilder();
|
||||
final int numSegments = segments.size();
|
||||
for(int i=0;i<numSegments;i++) {
|
||||
|
@ -162,6 +162,30 @@ public abstract class MergePolicy implements java.io.Closeable {
|
|||
}
|
||||
return b.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the total size in bytes of this merge. Note that this does not
|
||||
* indicate the size of the merged segment, but the input total size.
|
||||
* */
|
||||
public long totalBytesSize() throws IOException {
|
||||
long total = 0;
|
||||
for (SegmentInfo info : segments) {
|
||||
total += info.sizeInBytes();
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the total number of documents that are included with this merge.
|
||||
* Note that this does not indicate the number of documents after the merge.
|
||||
* */
|
||||
public int totalNumDocs() throws IOException {
|
||||
int total = 0;
|
||||
for (SegmentInfo info : segments) {
|
||||
total += info.docCount;
|
||||
}
|
||||
return total;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -176,7 +200,7 @@ public abstract class MergePolicy implements java.io.Closeable {
|
|||
* The subset of segments to be included in the primitive merge.
|
||||
*/
|
||||
|
||||
public List<OneMerge> merges = new ArrayList<OneMerge>();
|
||||
public final List<OneMerge> merges = new ArrayList<OneMerge>();
|
||||
|
||||
public void add(OneMerge merge) {
|
||||
merges.add(merge);
|
||||
|
|
|
@ -0,0 +1,372 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
public class TestSizeBoundedOptimize extends LuceneTestCase {
|
||||
|
||||
private void addDocs(IndexWriter writer, int numDocs) throws IOException {
|
||||
for (int i = 0; i < numDocs; i++) {
|
||||
Document doc = new Document();
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
writer.commit();
|
||||
}
|
||||
|
||||
public void testByteSizeLimit() throws Exception {
|
||||
// tests that the max merge size constraint is applied during optimize.
|
||||
Directory dir = new RAMDirectory();
|
||||
|
||||
// Prepare an index w/ several small segments and a large one.
|
||||
IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, null);
|
||||
// prevent any merges from happening.
|
||||
conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES);
|
||||
IndexWriter writer = new IndexWriter(dir, conf);
|
||||
final int numSegments = 15;
|
||||
for (int i = 0; i < numSegments; i++) {
|
||||
int numDocs = i == 7 ? 30 : 1;
|
||||
addDocs(writer, numDocs);
|
||||
}
|
||||
writer.close();
|
||||
|
||||
SegmentInfos sis = new SegmentInfos();
|
||||
sis.read(dir);
|
||||
double min = sis.info(0).sizeInBytes();
|
||||
|
||||
conf = new IndexWriterConfig(TEST_VERSION_CURRENT, null);
|
||||
LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy();
|
||||
lmp.setMaxMergeMB((min + 1) / (1 << 20));
|
||||
conf.setMergePolicy(lmp);
|
||||
|
||||
writer = new IndexWriter(dir, conf);
|
||||
writer.optimize();
|
||||
writer.close();
|
||||
|
||||
// Should only be 3 segments in the index, because one of them exceeds the size limit
|
||||
sis = new SegmentInfos();
|
||||
sis.read(dir);
|
||||
assertEquals(3, sis.size());
|
||||
}
|
||||
|
||||
public void testNumDocsLimit() throws Exception {
|
||||
// tests that the max merge docs constraint is applied during optimize.
|
||||
Directory dir = new RAMDirectory();
|
||||
|
||||
// Prepare an index w/ several small segments and a large one.
|
||||
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
|
||||
// prevent any merges from happening.
|
||||
conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES);
|
||||
IndexWriter writer = new IndexWriter(dir, conf);
|
||||
|
||||
addDocs(writer, 3);
|
||||
addDocs(writer, 3);
|
||||
addDocs(writer, 5);
|
||||
addDocs(writer, 3);
|
||||
addDocs(writer, 3);
|
||||
addDocs(writer, 3);
|
||||
addDocs(writer, 3);
|
||||
|
||||
writer.close();
|
||||
|
||||
conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
|
||||
LogMergePolicy lmp = new LogDocMergePolicy();
|
||||
lmp.setMaxMergeDocs(3);
|
||||
conf.setMergePolicy(lmp);
|
||||
|
||||
writer = new IndexWriter(dir, conf);
|
||||
writer.optimize();
|
||||
writer.close();
|
||||
|
||||
// Should only be 3 segments in the index, because one of them exceeds the size limit
|
||||
SegmentInfos sis = new SegmentInfos();
|
||||
sis.read(dir);
|
||||
assertEquals(3, sis.size());
|
||||
}
|
||||
|
||||
public void testLastSegmentTooLarge() throws Exception {
|
||||
Directory dir = new RAMDirectory();
|
||||
|
||||
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
|
||||
conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES);
|
||||
IndexWriter writer = new IndexWriter(dir, conf);
|
||||
|
||||
addDocs(writer, 3);
|
||||
addDocs(writer, 3);
|
||||
addDocs(writer, 3);
|
||||
addDocs(writer, 5);
|
||||
|
||||
writer.close();
|
||||
|
||||
conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
|
||||
LogMergePolicy lmp = new LogDocMergePolicy();
|
||||
lmp.setMaxMergeDocs(3);
|
||||
conf.setMergePolicy(lmp);
|
||||
|
||||
writer = new IndexWriter(dir, conf);
|
||||
writer.optimize();
|
||||
writer.close();
|
||||
|
||||
SegmentInfos sis = new SegmentInfos();
|
||||
sis.read(dir);
|
||||
assertEquals(2, sis.size());
|
||||
}
|
||||
|
||||
public void testFirstSegmentTooLarge() throws Exception {
|
||||
Directory dir = new RAMDirectory();
|
||||
|
||||
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
|
||||
conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES);
|
||||
IndexWriter writer = new IndexWriter(dir, conf);
|
||||
|
||||
addDocs(writer, 5);
|
||||
addDocs(writer, 3);
|
||||
addDocs(writer, 3);
|
||||
addDocs(writer, 3);
|
||||
|
||||
writer.close();
|
||||
|
||||
conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
|
||||
LogMergePolicy lmp = new LogDocMergePolicy();
|
||||
lmp.setMaxMergeDocs(3);
|
||||
conf.setMergePolicy(lmp);
|
||||
|
||||
writer = new IndexWriter(dir, conf);
|
||||
writer.optimize();
|
||||
writer.close();
|
||||
|
||||
SegmentInfos sis = new SegmentInfos();
|
||||
sis.read(dir);
|
||||
assertEquals(2, sis.size());
|
||||
}
|
||||
|
||||
public void testAllSegmentsSmall() throws Exception {
|
||||
Directory dir = new RAMDirectory();
|
||||
|
||||
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
|
||||
conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES);
|
||||
IndexWriter writer = new IndexWriter(dir, conf);
|
||||
|
||||
addDocs(writer, 3);
|
||||
addDocs(writer, 3);
|
||||
addDocs(writer, 3);
|
||||
addDocs(writer, 3);
|
||||
|
||||
writer.close();
|
||||
|
||||
conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
|
||||
LogMergePolicy lmp = new LogDocMergePolicy();
|
||||
lmp.setMaxMergeDocs(3);
|
||||
conf.setMergePolicy(lmp);
|
||||
|
||||
writer = new IndexWriter(dir, conf);
|
||||
writer.optimize();
|
||||
writer.close();
|
||||
|
||||
SegmentInfos sis = new SegmentInfos();
|
||||
sis.read(dir);
|
||||
assertEquals(1, sis.size());
|
||||
}
|
||||
|
||||
public void testAllSegmentsLarge() throws Exception {
|
||||
Directory dir = new RAMDirectory();
|
||||
|
||||
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
|
||||
conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES);
|
||||
IndexWriter writer = new IndexWriter(dir, conf);
|
||||
|
||||
addDocs(writer, 3);
|
||||
addDocs(writer, 3);
|
||||
addDocs(writer, 3);
|
||||
|
||||
writer.close();
|
||||
|
||||
conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
|
||||
LogMergePolicy lmp = new LogDocMergePolicy();
|
||||
lmp.setMaxMergeDocs(2);
|
||||
conf.setMergePolicy(lmp);
|
||||
|
||||
writer = new IndexWriter(dir, conf);
|
||||
writer.optimize();
|
||||
writer.close();
|
||||
|
||||
SegmentInfos sis = new SegmentInfos();
|
||||
sis.read(dir);
|
||||
assertEquals(3, sis.size());
|
||||
}
|
||||
|
||||
public void testOneLargeOneSmall() throws Exception {
|
||||
Directory dir = new RAMDirectory();
|
||||
|
||||
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
|
||||
conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES);
|
||||
IndexWriter writer = new IndexWriter(dir, conf);
|
||||
|
||||
addDocs(writer, 3);
|
||||
addDocs(writer, 5);
|
||||
addDocs(writer, 3);
|
||||
addDocs(writer, 5);
|
||||
|
||||
writer.close();
|
||||
|
||||
conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
|
||||
LogMergePolicy lmp = new LogDocMergePolicy();
|
||||
lmp.setMaxMergeDocs(3);
|
||||
conf.setMergePolicy(lmp);
|
||||
|
||||
writer = new IndexWriter(dir, conf);
|
||||
writer.optimize();
|
||||
writer.close();
|
||||
|
||||
SegmentInfos sis = new SegmentInfos();
|
||||
sis.read(dir);
|
||||
assertEquals(4, sis.size());
|
||||
}
|
||||
|
||||
public void testMergeFactor() throws Exception {
|
||||
Directory dir = new RAMDirectory();
|
||||
|
||||
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
|
||||
conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES);
|
||||
IndexWriter writer = new IndexWriter(dir, conf);
|
||||
|
||||
addDocs(writer, 3);
|
||||
addDocs(writer, 3);
|
||||
addDocs(writer, 3);
|
||||
addDocs(writer, 3);
|
||||
addDocs(writer, 5);
|
||||
addDocs(writer, 3);
|
||||
addDocs(writer, 3);
|
||||
|
||||
writer.close();
|
||||
|
||||
conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
|
||||
LogMergePolicy lmp = new LogDocMergePolicy();
|
||||
lmp.setMaxMergeDocs(3);
|
||||
lmp.setMergeFactor(2);
|
||||
conf.setMergePolicy(lmp);
|
||||
|
||||
writer = new IndexWriter(dir, conf);
|
||||
writer.optimize();
|
||||
writer.close();
|
||||
|
||||
// Should only be 4 segments in the index, because of the merge factor and
|
||||
// max merge docs settings.
|
||||
SegmentInfos sis = new SegmentInfos();
|
||||
sis.read(dir);
|
||||
assertEquals(4, sis.size());
|
||||
}
|
||||
|
||||
public void testSingleNonOptimizedSegment() throws Exception {
|
||||
Directory dir = new RAMDirectory();
|
||||
|
||||
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
|
||||
conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES);
|
||||
IndexWriter writer = new IndexWriter(dir, conf);
|
||||
|
||||
addDocs(writer, 3);
|
||||
addDocs(writer, 5);
|
||||
addDocs(writer, 3);
|
||||
|
||||
writer.close();
|
||||
|
||||
// delete the last document, so that the last segment is optimized.
|
||||
IndexReader r = IndexReader.open(dir, false);
|
||||
r.deleteDocument(r.numDocs() - 1);
|
||||
r.close();
|
||||
|
||||
conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
|
||||
LogMergePolicy lmp = new LogDocMergePolicy();
|
||||
lmp.setMaxMergeDocs(3);
|
||||
conf.setMergePolicy(lmp);
|
||||
|
||||
writer = new IndexWriter(dir, conf);
|
||||
writer.optimize();
|
||||
writer.close();
|
||||
|
||||
// Verify that the last segment does not have deletions.
|
||||
SegmentInfos sis = new SegmentInfos();
|
||||
sis.read(dir);
|
||||
assertEquals(3, sis.size());
|
||||
assertFalse(sis.info(2).hasDeletions());
|
||||
}
|
||||
|
||||
public void testSingleOptimizedSegment() throws Exception {
|
||||
Directory dir = new RAMDirectory();
|
||||
|
||||
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
|
||||
conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES);
|
||||
IndexWriter writer = new IndexWriter(dir, conf);
|
||||
|
||||
addDocs(writer, 3);
|
||||
|
||||
writer.close();
|
||||
|
||||
conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
|
||||
LogMergePolicy lmp = new LogDocMergePolicy();
|
||||
lmp.setMaxMergeDocs(3);
|
||||
conf.setMergePolicy(lmp);
|
||||
|
||||
writer = new IndexWriter(dir, conf);
|
||||
writer.optimize();
|
||||
writer.close();
|
||||
|
||||
// Verify that the last segment does not have deletions.
|
||||
SegmentInfos sis = new SegmentInfos();
|
||||
sis.read(dir);
|
||||
assertEquals(1, sis.size());
|
||||
}
|
||||
|
||||
public void testSingleNonOptimizedTooLargeSegment() throws Exception {
|
||||
Directory dir = new RAMDirectory();
|
||||
|
||||
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
|
||||
conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES);
|
||||
IndexWriter writer = new IndexWriter(dir, conf);
|
||||
|
||||
addDocs(writer, 5);
|
||||
|
||||
writer.close();
|
||||
|
||||
// delete the last document
|
||||
IndexReader r = IndexReader.open(dir, false);
|
||||
r.deleteDocument(r.numDocs() - 1);
|
||||
r.close();
|
||||
|
||||
conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
|
||||
LogMergePolicy lmp = new LogDocMergePolicy();
|
||||
lmp.setMaxMergeDocs(2);
|
||||
conf.setMergePolicy(lmp);
|
||||
|
||||
writer = new IndexWriter(dir, conf);
|
||||
writer.optimize();
|
||||
writer.close();
|
||||
|
||||
// Verify that the last segment does not have deletions.
|
||||
SegmentInfos sis = new SegmentInfos();
|
||||
sis.read(dir);
|
||||
assertEquals(1, sis.size());
|
||||
assertTrue(sis.info(0).hasDeletions());
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue