LUCENE-2701: port to trunk

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1059751 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Shai Erera 2011-01-17 04:47:03 +00:00
parent 4249ef9644
commit 39fad978a0
5 changed files with 42 additions and 11 deletions

View File

@ -507,9 +507,10 @@ Changes in runtime behavior
usage, allowing applications to accidentally open two writers on the
same directory. (Mike McCandless)
* LUCENE-2701: maxMergeMB and maxMergeDocs constraints set on LogMergePolicy now
affect optimize() as well (as opposed to only regular merges). This means that
you can run optimize() and too large segments won't be merged. (Shai Erera)
* LUCENE-2701: maxMergeMBForOptimize and maxMergeDocs constraints set on
LogMergePolicy now affect optimize() as well (as opposed to only regular
merges). This means that you can run optimize() and too large segments won't
be merged. (Shai Erera)
API Changes

View File

@ -30,9 +30,14 @@ public class LogByteSizeMergePolicy extends LogMergePolicy {
* or larger will never be merged. @see setMaxMergeMB */
public static final double DEFAULT_MAX_MERGE_MB = 2048;
/** Default maximum segment size. A segment of this size
* or larger will never be merged during optimize. @see setMaxMergeMBForOptimize */
public static final double DEFAULT_MAX_MERGE_MB_FOR_OPTIMIZE = Long.MAX_VALUE;
public LogByteSizeMergePolicy() {
minMergeSize = (long) (DEFAULT_MIN_MERGE_MB*1024*1024);
maxMergeSize = (long) (DEFAULT_MAX_MERGE_MB*1024*1024);
maxMergeSizeForOptimize = (long) (DEFAULT_MAX_MERGE_MB_FOR_OPTIMIZE*1024*1024);
}
@Override
@ -63,6 +68,23 @@ public class LogByteSizeMergePolicy extends LogMergePolicy {
return ((double) maxMergeSize)/1024/1024;
}
/** <p>Determines the largest segment (measured by total
* byte size of the segment's files, in MB) that may be
* merged with other segments during optimize. Setting
* it low will leave the index with more than 1 segment,
* even if {@link IndexWriter#optimize()} is called.*/
public void setMaxMergeMBForOptimize(double mb) {
maxMergeSizeForOptimize = (long) (mb*1024*1024);
}
/** Returns the largest segment (measured by total byte
* size of the segment's files, in MB) that may be merged
* with other segments during optimize.
* @see #setMaxMergeMBForOptimize */
public double getMaxMergeMBForOptimize() {
return ((double) maxMergeSizeForOptimize)/1024/1024;
}
/** Sets the minimum size for the lowest level segments.
* Any segments below this size are considered to be on
* the same level (even if they vary drastically in size)

View File

@ -31,9 +31,10 @@ public class LogDocMergePolicy extends LogMergePolicy {
public LogDocMergePolicy() {
minMergeSize = DEFAULT_MIN_MERGE_DOCS;
// maxMergeSize is never used by LogDocMergePolicy; set
// maxMergeSize(ForOptimize) are never used by LogDocMergePolicy; set
// it to Long.MAX_VALUE to disable it
maxMergeSize = Long.MAX_VALUE;
maxMergeSizeForOptimize = Long.MAX_VALUE;
}
@Override

View File

@ -63,6 +63,9 @@ public abstract class LogMergePolicy extends MergePolicy {
protected long minMergeSize;
protected long maxMergeSize;
// Although the core MPs set it explicitly, we must default in case someone
// out there wrote his own LMP ...
protected long maxMergeSizeForOptimize = Long.MAX_VALUE;
protected int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;
protected double noCFSRatio = DEFAULT_NO_CFS_RATIO;
@ -240,9 +243,9 @@ public abstract class LogMergePolicy extends MergePolicy {
int start = last - 1;
while (start >= 0) {
SegmentInfo info = infos.info(start);
if (size(info) > maxMergeSize || sizeDocs(info) > maxMergeDocs) {
if (size(info) > maxMergeSizeForOptimize || sizeDocs(info) > maxMergeDocs) {
if (verbose()) {
message("optimize: skip segment=" + info + ": size is > maxMergeSize (" + maxMergeSize + ") or sizeDocs is > maxMergeDocs (" + maxMergeDocs + ")");
message("optimize: skip segment=" + info + ": size is > maxMergeSize (" + maxMergeSizeForOptimize + ") or sizeDocs is > maxMergeDocs (" + maxMergeDocs + ")");
}
// need to skip that segment + add a merge for the 'right' segments,
// unless there is only 1 which is optimized.
@ -326,9 +329,12 @@ public abstract class LogMergePolicy extends MergePolicy {
}
/** Returns the merges necessary to optimize the index.
* This merge policy defines "optimized" to mean only one
* segment in the index, where that segment has no
* deletions pending nor separate norms, and it is in
* This merge policy defines "optimized" to mean only the
* requested number of segments is left in the index, and
* respects the {@link #maxMergeSizeForOptimize} setting.
* By default, and assuming {@code maxNumSegments=1}, only
* one segment will be left in the index, where that segment
* has no deletions pending nor separate norms, and it is in
* compound file format if the current useCompoundFile
* setting is true. This method returns multiple merges
* (mergeFactor at a time) so the {@link MergeScheduler}
@ -382,7 +388,7 @@ public abstract class LogMergePolicy extends MergePolicy {
boolean anyTooLarge = false;
for (int i = 0; i < last; i++) {
SegmentInfo info = infos.info(i);
if (size(info) > maxMergeSize || sizeDocs(info) > maxMergeDocs) {
if (size(info) > maxMergeSizeForOptimize || sizeDocs(info) > maxMergeDocs) {
anyTooLarge = true;
break;
}
@ -588,6 +594,7 @@ public abstract class LogMergePolicy extends MergePolicy {
sb.append("minMergeSize=").append(minMergeSize).append(", ");
sb.append("mergeFactor=").append(mergeFactor).append(", ");
sb.append("maxMergeSize=").append(maxMergeSize).append(", ");
sb.append("maxMergeSizeForOptimize=").append(maxMergeSizeForOptimize).append(", ");
sb.append("calibrateSizeByDeletes=").append(calibrateSizeByDeletes).append(", ");
sb.append("maxMergeDocs=").append(maxMergeDocs).append(", ");
sb.append("useCompoundFile=").append(useCompoundFile);

View File

@ -63,7 +63,7 @@ public class TestSizeBoundedOptimize extends LuceneTestCase {
conf = newWriterConfig();
LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy();
lmp.setMaxMergeMB((min + 1) / (1 << 20));
lmp.setMaxMergeMBForOptimize((min + 1) / (1 << 20));
conf.setMergePolicy(lmp);
writer = new IndexWriter(dir, conf);