mirror of https://github.com/apache/lucene.git
LUCENE-2701: port to trunk
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1059751 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4249ef9644
commit
39fad978a0
|
@ -507,9 +507,10 @@ Changes in runtime behavior
|
||||||
usage, allowing applications to accidentally open two writers on the
|
usage, allowing applications to accidentally open two writers on the
|
||||||
same directory. (Mike McCandless)
|
same directory. (Mike McCandless)
|
||||||
|
|
||||||
* LUCENE-2701: maxMergeMB and maxMergeDocs constraints set on LogMergePolicy now
|
* LUCENE-2701: maxMergeMBForOptimize and maxMergeDocs constraints set on
|
||||||
affect optimize() as well (as opposed to only regular merges). This means that
|
LogMergePolicy now affect optimize() as well (as opposed to only regular
|
||||||
you can run optimize() and too large segments won't be merged. (Shai Erera)
|
merges). This means that you can run optimize() and too large segments won't
|
||||||
|
be merged. (Shai Erera)
|
||||||
|
|
||||||
API Changes
|
API Changes
|
||||||
|
|
||||||
|
|
|
@ -30,9 +30,14 @@ public class LogByteSizeMergePolicy extends LogMergePolicy {
|
||||||
* or larger will never be merged. @see setMaxMergeMB */
|
* or larger will never be merged. @see setMaxMergeMB */
|
||||||
public static final double DEFAULT_MAX_MERGE_MB = 2048;
|
public static final double DEFAULT_MAX_MERGE_MB = 2048;
|
||||||
|
|
||||||
|
/** Default maximum segment size. A segment of this size
|
||||||
|
* or larger will never be merged during optimize. @see setMaxMergeMBForOptimize */
|
||||||
|
public static final double DEFAULT_MAX_MERGE_MB_FOR_OPTIMIZE = Long.MAX_VALUE;
|
||||||
|
|
||||||
public LogByteSizeMergePolicy() {
|
public LogByteSizeMergePolicy() {
|
||||||
minMergeSize = (long) (DEFAULT_MIN_MERGE_MB*1024*1024);
|
minMergeSize = (long) (DEFAULT_MIN_MERGE_MB*1024*1024);
|
||||||
maxMergeSize = (long) (DEFAULT_MAX_MERGE_MB*1024*1024);
|
maxMergeSize = (long) (DEFAULT_MAX_MERGE_MB*1024*1024);
|
||||||
|
maxMergeSizeForOptimize = (long) (DEFAULT_MAX_MERGE_MB_FOR_OPTIMIZE*1024*1024);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -63,6 +68,23 @@ public class LogByteSizeMergePolicy extends LogMergePolicy {
|
||||||
return ((double) maxMergeSize)/1024/1024;
|
return ((double) maxMergeSize)/1024/1024;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** <p>Determines the largest segment (measured by total
|
||||||
|
* byte size of the segment's files, in MB) that may be
|
||||||
|
* merged with other segments during optimize. Setting
|
||||||
|
* it low will leave the index with more than 1 segment,
|
||||||
|
* even if {@link IndexWriter#optimize()} is called.*/
|
||||||
|
public void setMaxMergeMBForOptimize(double mb) {
|
||||||
|
maxMergeSizeForOptimize = (long) (mb*1024*1024);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns the largest segment (measured by total byte
|
||||||
|
* size of the segment's files, in MB) that may be merged
|
||||||
|
* with other segments during optimize.
|
||||||
|
* @see #setMaxMergeMBForOptimize */
|
||||||
|
public double getMaxMergeMBForOptimize() {
|
||||||
|
return ((double) maxMergeSizeForOptimize)/1024/1024;
|
||||||
|
}
|
||||||
|
|
||||||
/** Sets the minimum size for the lowest level segments.
|
/** Sets the minimum size for the lowest level segments.
|
||||||
* Any segments below this size are considered to be on
|
* Any segments below this size are considered to be on
|
||||||
* the same level (even if they vary drastically in size)
|
* the same level (even if they vary drastically in size)
|
||||||
|
|
|
@ -31,9 +31,10 @@ public class LogDocMergePolicy extends LogMergePolicy {
|
||||||
public LogDocMergePolicy() {
|
public LogDocMergePolicy() {
|
||||||
minMergeSize = DEFAULT_MIN_MERGE_DOCS;
|
minMergeSize = DEFAULT_MIN_MERGE_DOCS;
|
||||||
|
|
||||||
// maxMergeSize is never used by LogDocMergePolicy; set
|
// maxMergeSize(ForOptimize) are never used by LogDocMergePolicy; set
|
||||||
// it to Long.MAX_VALUE to disable it
|
// it to Long.MAX_VALUE to disable it
|
||||||
maxMergeSize = Long.MAX_VALUE;
|
maxMergeSize = Long.MAX_VALUE;
|
||||||
|
maxMergeSizeForOptimize = Long.MAX_VALUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -63,6 +63,9 @@ public abstract class LogMergePolicy extends MergePolicy {
|
||||||
|
|
||||||
protected long minMergeSize;
|
protected long minMergeSize;
|
||||||
protected long maxMergeSize;
|
protected long maxMergeSize;
|
||||||
|
// Although the core MPs set it explicitly, we must default in case someone
|
||||||
|
// out there wrote his own LMP ...
|
||||||
|
protected long maxMergeSizeForOptimize = Long.MAX_VALUE;
|
||||||
protected int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;
|
protected int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;
|
||||||
|
|
||||||
protected double noCFSRatio = DEFAULT_NO_CFS_RATIO;
|
protected double noCFSRatio = DEFAULT_NO_CFS_RATIO;
|
||||||
|
@ -240,9 +243,9 @@ public abstract class LogMergePolicy extends MergePolicy {
|
||||||
int start = last - 1;
|
int start = last - 1;
|
||||||
while (start >= 0) {
|
while (start >= 0) {
|
||||||
SegmentInfo info = infos.info(start);
|
SegmentInfo info = infos.info(start);
|
||||||
if (size(info) > maxMergeSize || sizeDocs(info) > maxMergeDocs) {
|
if (size(info) > maxMergeSizeForOptimize || sizeDocs(info) > maxMergeDocs) {
|
||||||
if (verbose()) {
|
if (verbose()) {
|
||||||
message("optimize: skip segment=" + info + ": size is > maxMergeSize (" + maxMergeSize + ") or sizeDocs is > maxMergeDocs (" + maxMergeDocs + ")");
|
message("optimize: skip segment=" + info + ": size is > maxMergeSize (" + maxMergeSizeForOptimize + ") or sizeDocs is > maxMergeDocs (" + maxMergeDocs + ")");
|
||||||
}
|
}
|
||||||
// need to skip that segment + add a merge for the 'right' segments,
|
// need to skip that segment + add a merge for the 'right' segments,
|
||||||
// unless there is only 1 which is optimized.
|
// unless there is only 1 which is optimized.
|
||||||
|
@ -326,9 +329,12 @@ public abstract class LogMergePolicy extends MergePolicy {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns the merges necessary to optimize the index.
|
/** Returns the merges necessary to optimize the index.
|
||||||
* This merge policy defines "optimized" to mean only one
|
* This merge policy defines "optimized" to mean only the
|
||||||
* segment in the index, where that segment has no
|
* requested number of segments is left in the index, and
|
||||||
* deletions pending nor separate norms, and it is in
|
* respects the {@link #maxMergeSizeForOptimize} setting.
|
||||||
|
* By default, and assuming {@code maxNumSegments=1}, only
|
||||||
|
* one segment will be left in the index, where that segment
|
||||||
|
* has no deletions pending nor separate norms, and it is in
|
||||||
* compound file format if the current useCompoundFile
|
* compound file format if the current useCompoundFile
|
||||||
* setting is true. This method returns multiple merges
|
* setting is true. This method returns multiple merges
|
||||||
* (mergeFactor at a time) so the {@link MergeScheduler}
|
* (mergeFactor at a time) so the {@link MergeScheduler}
|
||||||
|
@ -382,7 +388,7 @@ public abstract class LogMergePolicy extends MergePolicy {
|
||||||
boolean anyTooLarge = false;
|
boolean anyTooLarge = false;
|
||||||
for (int i = 0; i < last; i++) {
|
for (int i = 0; i < last; i++) {
|
||||||
SegmentInfo info = infos.info(i);
|
SegmentInfo info = infos.info(i);
|
||||||
if (size(info) > maxMergeSize || sizeDocs(info) > maxMergeDocs) {
|
if (size(info) > maxMergeSizeForOptimize || sizeDocs(info) > maxMergeDocs) {
|
||||||
anyTooLarge = true;
|
anyTooLarge = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -588,6 +594,7 @@ public abstract class LogMergePolicy extends MergePolicy {
|
||||||
sb.append("minMergeSize=").append(minMergeSize).append(", ");
|
sb.append("minMergeSize=").append(minMergeSize).append(", ");
|
||||||
sb.append("mergeFactor=").append(mergeFactor).append(", ");
|
sb.append("mergeFactor=").append(mergeFactor).append(", ");
|
||||||
sb.append("maxMergeSize=").append(maxMergeSize).append(", ");
|
sb.append("maxMergeSize=").append(maxMergeSize).append(", ");
|
||||||
|
sb.append("maxMergeSizeForOptimize=").append(maxMergeSizeForOptimize).append(", ");
|
||||||
sb.append("calibrateSizeByDeletes=").append(calibrateSizeByDeletes).append(", ");
|
sb.append("calibrateSizeByDeletes=").append(calibrateSizeByDeletes).append(", ");
|
||||||
sb.append("maxMergeDocs=").append(maxMergeDocs).append(", ");
|
sb.append("maxMergeDocs=").append(maxMergeDocs).append(", ");
|
||||||
sb.append("useCompoundFile=").append(useCompoundFile);
|
sb.append("useCompoundFile=").append(useCompoundFile);
|
||||||
|
|
|
@ -63,7 +63,7 @@ public class TestSizeBoundedOptimize extends LuceneTestCase {
|
||||||
|
|
||||||
conf = newWriterConfig();
|
conf = newWriterConfig();
|
||||||
LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy();
|
LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy();
|
||||||
lmp.setMaxMergeMB((min + 1) / (1 << 20));
|
lmp.setMaxMergeMBForOptimize((min + 1) / (1 << 20));
|
||||||
conf.setMergePolicy(lmp);
|
conf.setMergePolicy(lmp);
|
||||||
|
|
||||||
writer = new IndexWriter(dir, conf);
|
writer = new IndexWriter(dir, conf);
|
||||||
|
|
Loading…
Reference in New Issue