LUCENE-2701: port to trunk

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1059751 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Shai Erera 2011-01-17 04:47:03 +00:00
parent 4249ef9644
commit 39fad978a0
5 changed files with 42 additions and 11 deletions

View File

@ -507,9 +507,10 @@ Changes in runtime behavior
usage, allowing applications to accidentally open two writers on the usage, allowing applications to accidentally open two writers on the
same directory. (Mike McCandless) same directory. (Mike McCandless)
* LUCENE-2701: maxMergeMB and maxMergeDocs constraints set on LogMergePolicy now * LUCENE-2701: maxMergeMBForOptimize and maxMergeDocs constraints set on
affect optimize() as well (as opposed to only regular merges). This means that LogMergePolicy now affect optimize() as well (as opposed to only regular
you can run optimize() and too large segments won't be merged. (Shai Erera) merges). This means that you can run optimize() and too large segments won't
be merged. (Shai Erera)
API Changes API Changes

View File

@ -30,9 +30,14 @@ public class LogByteSizeMergePolicy extends LogMergePolicy {
* or larger will never be merged. @see setMaxMergeMB */ * or larger will never be merged. @see setMaxMergeMB */
public static final double DEFAULT_MAX_MERGE_MB = 2048; public static final double DEFAULT_MAX_MERGE_MB = 2048;
/** Default maximum segment size. A segment of this size
* or larger will never be merged during optimize. @see setMaxMergeMBForOptimize */
public static final double DEFAULT_MAX_MERGE_MB_FOR_OPTIMIZE = Long.MAX_VALUE;
public LogByteSizeMergePolicy() { public LogByteSizeMergePolicy() {
minMergeSize = (long) (DEFAULT_MIN_MERGE_MB*1024*1024); minMergeSize = (long) (DEFAULT_MIN_MERGE_MB*1024*1024);
maxMergeSize = (long) (DEFAULT_MAX_MERGE_MB*1024*1024); maxMergeSize = (long) (DEFAULT_MAX_MERGE_MB*1024*1024);
maxMergeSizeForOptimize = (long) (DEFAULT_MAX_MERGE_MB_FOR_OPTIMIZE*1024*1024);
} }
@Override @Override
@ -63,6 +68,23 @@ public class LogByteSizeMergePolicy extends LogMergePolicy {
return ((double) maxMergeSize)/1024/1024; return ((double) maxMergeSize)/1024/1024;
} }
/** <p>Determines the largest segment (measured by total
* byte size of the segment's files, in MB) that may be
* merged with other segments during optimize. Setting
* it low will leave the index with more than 1 segment,
* even if {@link IndexWriter#optimize()} is called.*/
public void setMaxMergeMBForOptimize(double mb) {
maxMergeSizeForOptimize = (long) (mb*1024*1024);
}
/** Returns the largest segment (measured by total byte
* size of the segment's files, in MB) that may be merged
* with other segments during optimize.
* @see #setMaxMergeMBForOptimize */
public double getMaxMergeMBForOptimize() {
return ((double) maxMergeSizeForOptimize)/1024/1024;
}
/** Sets the minimum size for the lowest level segments. /** Sets the minimum size for the lowest level segments.
* Any segments below this size are considered to be on * Any segments below this size are considered to be on
* the same level (even if they vary drastically in size) * the same level (even if they vary drastically in size)

View File

@ -31,9 +31,10 @@ public class LogDocMergePolicy extends LogMergePolicy {
public LogDocMergePolicy() { public LogDocMergePolicy() {
minMergeSize = DEFAULT_MIN_MERGE_DOCS; minMergeSize = DEFAULT_MIN_MERGE_DOCS;
// maxMergeSize is never used by LogDocMergePolicy; set // maxMergeSize(ForOptimize) are never used by LogDocMergePolicy; set
// it to Long.MAX_VALUE to disable it // it to Long.MAX_VALUE to disable it
maxMergeSize = Long.MAX_VALUE; maxMergeSize = Long.MAX_VALUE;
maxMergeSizeForOptimize = Long.MAX_VALUE;
} }
@Override @Override

View File

@ -63,6 +63,9 @@ public abstract class LogMergePolicy extends MergePolicy {
protected long minMergeSize; protected long minMergeSize;
protected long maxMergeSize; protected long maxMergeSize;
// Although the core MPs set it explicitly, we must default in case someone
// out there wrote his own LMP ...
protected long maxMergeSizeForOptimize = Long.MAX_VALUE;
protected int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS; protected int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;
protected double noCFSRatio = DEFAULT_NO_CFS_RATIO; protected double noCFSRatio = DEFAULT_NO_CFS_RATIO;
@ -240,9 +243,9 @@ public abstract class LogMergePolicy extends MergePolicy {
int start = last - 1; int start = last - 1;
while (start >= 0) { while (start >= 0) {
SegmentInfo info = infos.info(start); SegmentInfo info = infos.info(start);
if (size(info) > maxMergeSize || sizeDocs(info) > maxMergeDocs) { if (size(info) > maxMergeSizeForOptimize || sizeDocs(info) > maxMergeDocs) {
if (verbose()) { if (verbose()) {
message("optimize: skip segment=" + info + ": size is > maxMergeSize (" + maxMergeSize + ") or sizeDocs is > maxMergeDocs (" + maxMergeDocs + ")"); message("optimize: skip segment=" + info + ": size is > maxMergeSize (" + maxMergeSizeForOptimize + ") or sizeDocs is > maxMergeDocs (" + maxMergeDocs + ")");
} }
// need to skip that segment + add a merge for the 'right' segments, // need to skip that segment + add a merge for the 'right' segments,
// unless there is only 1 which is optimized. // unless there is only 1 which is optimized.
@ -326,9 +329,12 @@ public abstract class LogMergePolicy extends MergePolicy {
} }
/** Returns the merges necessary to optimize the index. /** Returns the merges necessary to optimize the index.
* This merge policy defines "optimized" to mean only one * This merge policy defines "optimized" to mean only the
* segment in the index, where that segment has no * requested number of segments is left in the index, and
* deletions pending nor separate norms, and it is in * respects the {@link #maxMergeSizeForOptimize} setting.
* By default, and assuming {@code maxNumSegments=1}, only
* one segment will be left in the index, where that segment
* has no deletions pending nor separate norms, and it is in
* compound file format if the current useCompoundFile * compound file format if the current useCompoundFile
* setting is true. This method returns multiple merges * setting is true. This method returns multiple merges
* (mergeFactor at a time) so the {@link MergeScheduler} * (mergeFactor at a time) so the {@link MergeScheduler}
@ -382,7 +388,7 @@ public abstract class LogMergePolicy extends MergePolicy {
boolean anyTooLarge = false; boolean anyTooLarge = false;
for (int i = 0; i < last; i++) { for (int i = 0; i < last; i++) {
SegmentInfo info = infos.info(i); SegmentInfo info = infos.info(i);
if (size(info) > maxMergeSize || sizeDocs(info) > maxMergeDocs) { if (size(info) > maxMergeSizeForOptimize || sizeDocs(info) > maxMergeDocs) {
anyTooLarge = true; anyTooLarge = true;
break; break;
} }
@ -588,6 +594,7 @@ public abstract class LogMergePolicy extends MergePolicy {
sb.append("minMergeSize=").append(minMergeSize).append(", "); sb.append("minMergeSize=").append(minMergeSize).append(", ");
sb.append("mergeFactor=").append(mergeFactor).append(", "); sb.append("mergeFactor=").append(mergeFactor).append(", ");
sb.append("maxMergeSize=").append(maxMergeSize).append(", "); sb.append("maxMergeSize=").append(maxMergeSize).append(", ");
sb.append("maxMergeSizeForOptimize=").append(maxMergeSizeForOptimize).append(", ");
sb.append("calibrateSizeByDeletes=").append(calibrateSizeByDeletes).append(", "); sb.append("calibrateSizeByDeletes=").append(calibrateSizeByDeletes).append(", ");
sb.append("maxMergeDocs=").append(maxMergeDocs).append(", "); sb.append("maxMergeDocs=").append(maxMergeDocs).append(", ");
sb.append("useCompoundFile=").append(useCompoundFile); sb.append("useCompoundFile=").append(useCompoundFile);

View File

@ -63,7 +63,7 @@ public class TestSizeBoundedOptimize extends LuceneTestCase {
conf = newWriterConfig(); conf = newWriterConfig();
LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy(); LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy();
lmp.setMaxMergeMB((min + 1) / (1 << 20)); lmp.setMaxMergeMBForOptimize((min + 1) / (1 << 20));
conf.setMergePolicy(lmp); conf.setMergePolicy(lmp);
writer = new IndexWriter(dir, conf); writer = new IndexWriter(dir, conf);