mirror of https://github.com/apache/lucene.git
LUCENE-4323: Added support for an absolute maximum CFS segment size (in MiB) to LogMergePolicy and TieredMergePolicy
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1376766 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a7baf1cd9f
commit
89f9beb738
|
@ -20,6 +20,10 @@ New Features
|
|||
search performance. This was from Han Jiang's 2012 Google Summer of
|
||||
Code project (Han Jiang, Adrien Grand, Robert Muir, Mike McCandless)
|
||||
|
||||
* LUCENE-4323: Added support for an absolute maximum CFS segment size
|
||||
(in MiB) to LogMergePolicy and TieredMergePolicy.
|
||||
(Alexey Lef via Uwe Schindler)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-4299: Added Terms.hasPositions() and Terms.hasOffsets().
|
||||
|
|
|
@ -64,6 +64,13 @@ public abstract class LogMergePolicy extends MergePolicy {
|
|||
* @see #setNoCFSRatio */
|
||||
public static final double DEFAULT_NO_CFS_RATIO = 0.1;
|
||||
|
||||
/** Default maxCFSSegmentSize value allows compound file
|
||||
* for a segment of any size. The actual file format is
|
||||
* still subject to noCFSRatio.
|
||||
* @see #setMaxCFSSegmentSizeMB(double)
|
||||
*/
|
||||
public static final long DEFAULT_MAX_CFS_SEGMENT_SIZE = Long.MAX_VALUE;
|
||||
|
||||
protected int mergeFactor = DEFAULT_MERGE_FACTOR;
|
||||
|
||||
protected long minMergeSize;
|
||||
|
@ -74,6 +81,7 @@ public abstract class LogMergePolicy extends MergePolicy {
|
|||
protected int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;
|
||||
|
||||
protected double noCFSRatio = DEFAULT_NO_CFS_RATIO;
|
||||
protected long maxCFSSegmentSize = DEFAULT_MAX_CFS_SEGMENT_SIZE;
|
||||
|
||||
protected boolean calibrateSizeByDeletes = true;
|
||||
|
||||
|
@ -136,21 +144,21 @@ public abstract class LogMergePolicy extends MergePolicy {
|
|||
// Javadoc inherited
|
||||
@Override
|
||||
public boolean useCompoundFile(SegmentInfos infos, SegmentInfoPerCommit mergedInfo) throws IOException {
|
||||
final boolean doCFS;
|
||||
|
||||
if (!useCompoundFile) {
|
||||
doCFS = false;
|
||||
} else if (noCFSRatio == 1.0) {
|
||||
doCFS = true;
|
||||
} else {
|
||||
long totalSize = 0;
|
||||
for (SegmentInfoPerCommit info : infos) {
|
||||
totalSize += size(info);
|
||||
}
|
||||
|
||||
doCFS = size(mergedInfo) <= noCFSRatio * totalSize;
|
||||
if (!getUseCompoundFile()) {
|
||||
return false;
|
||||
}
|
||||
return doCFS;
|
||||
long mergedInfoSize = size(mergedInfo);
|
||||
if (mergedInfoSize > maxCFSSegmentSize) {
|
||||
return false;
|
||||
}
|
||||
if (getNoCFSRatio() >= 1.0) {
|
||||
return true;
|
||||
}
|
||||
long totalSize = 0;
|
||||
for (SegmentInfoPerCommit info : infos) {
|
||||
totalSize += size(info);
|
||||
}
|
||||
return mergedInfoSize <= getNoCFSRatio() * totalSize;
|
||||
}
|
||||
|
||||
/** Sets whether compound file format should be used for
|
||||
|
@ -674,9 +682,28 @@ public abstract class LogMergePolicy extends MergePolicy {
|
|||
sb.append("calibrateSizeByDeletes=").append(calibrateSizeByDeletes).append(", ");
|
||||
sb.append("maxMergeDocs=").append(maxMergeDocs).append(", ");
|
||||
sb.append("useCompoundFile=").append(useCompoundFile).append(", ");
|
||||
sb.append("maxCFSSegmentSizeMB=").append(getMaxCFSSegmentSizeMB()).append(", ");
|
||||
sb.append("noCFSRatio=").append(noCFSRatio);
|
||||
sb.append("]");
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
|
||||
/** Returns the largest size allowed for a compound file segment */
|
||||
public final double getMaxCFSSegmentSizeMB() {
|
||||
return maxCFSSegmentSize/1024/1024.;
|
||||
}
|
||||
|
||||
/** If a merged segment will be more than this value,
|
||||
* leave the segment as
|
||||
* non-compound file even if compound file is enabled.
|
||||
* Set this to Double.POSITIVE_INFINITY (default) and noCFSRatio to 1.0
|
||||
* to always use CFS regardless of merge size. */
|
||||
public final void setMaxCFSSegmentSizeMB(double v) {
|
||||
if (v < 0.0) {
|
||||
throw new IllegalArgumentException("maxCFSSegmentSizeMB must be >=0 (got " + v + ")");
|
||||
}
|
||||
v *= 1024 * 1024;
|
||||
this.maxCFSSegmentSize = (v > Long.MAX_VALUE) ? Long.MAX_VALUE : (long) v;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -84,6 +84,7 @@ public class TieredMergePolicy extends MergePolicy {
|
|||
private double forceMergeDeletesPctAllowed = 10.0;
|
||||
private boolean useCompoundFile = true;
|
||||
private double noCFSRatio = 0.1;
|
||||
private long maxCFSSegmentSize = Long.MAX_VALUE;
|
||||
private double reclaimDeletesWeight = 2.0;
|
||||
|
||||
/** Maximum number of segments to be merged at a time
|
||||
|
@ -127,7 +128,11 @@ public class TieredMergePolicy extends MergePolicy {
|
|||
* sizes of to-be-merged segments (compensating for
|
||||
* percent deleted docs). Default is 5 GB. */
|
||||
public TieredMergePolicy setMaxMergedSegmentMB(double v) {
|
||||
maxMergedSegmentBytes = (long) (v*1024*1024);
|
||||
if (v < 0.0) {
|
||||
throw new IllegalArgumentException("maxMergedSegmentMB must be >=0 (got " + v + ")");
|
||||
}
|
||||
v *= 1024 * 1024;
|
||||
maxMergedSegmentBytes = (v > Long.MAX_VALUE) ? Long.MAX_VALUE : (long) v;
|
||||
return this;
|
||||
}
|
||||
|
||||
|
@ -162,7 +167,8 @@ public class TieredMergePolicy extends MergePolicy {
|
|||
if (v <= 0.0) {
|
||||
throw new IllegalArgumentException("floorSegmentMB must be >= 0.0 (got " + v + ")");
|
||||
}
|
||||
floorSegmentBytes = (long) (v*1024*1024);
|
||||
v *= 1024 * 1024;
|
||||
floorSegmentBytes = (v > Long.MAX_VALUE) ? Long.MAX_VALUE : (long) v;
|
||||
return this;
|
||||
}
|
||||
|
||||
|
@ -602,21 +608,21 @@ public class TieredMergePolicy extends MergePolicy {
|
|||
|
||||
@Override
|
||||
public boolean useCompoundFile(SegmentInfos infos, SegmentInfoPerCommit mergedInfo) throws IOException {
|
||||
final boolean doCFS;
|
||||
|
||||
if (!useCompoundFile) {
|
||||
doCFS = false;
|
||||
} else if (noCFSRatio == 1.0) {
|
||||
doCFS = true;
|
||||
} else {
|
||||
long totalSize = 0;
|
||||
for (SegmentInfoPerCommit info : infos) {
|
||||
totalSize += size(info);
|
||||
}
|
||||
|
||||
doCFS = size(mergedInfo) <= noCFSRatio * totalSize;
|
||||
if (!getUseCompoundFile()) {
|
||||
return false;
|
||||
}
|
||||
return doCFS;
|
||||
long mergedInfoSize = size(mergedInfo);
|
||||
if (mergedInfoSize > maxCFSSegmentSize) {
|
||||
return false;
|
||||
}
|
||||
if (getNoCFSRatio() >= 1.0) {
|
||||
return true;
|
||||
}
|
||||
long totalSize = 0;
|
||||
for (SegmentInfoPerCommit info : infos) {
|
||||
totalSize += size(info);
|
||||
}
|
||||
return mergedInfoSize <= getNoCFSRatio() * totalSize;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -629,7 +635,7 @@ public class TieredMergePolicy extends MergePolicy {
|
|||
boolean hasDeletions = w.numDeletedDocs(info) > 0;
|
||||
return !hasDeletions &&
|
||||
info.info.dir == w.getDirectory() &&
|
||||
(info.info.getUseCompoundFile() == useCompoundFile || noCFSRatio < 1.0);
|
||||
(info.info.getUseCompoundFile() == useCompoundFile || noCFSRatio < 1.0 || maxCFSSegmentSize < Long.MAX_VALUE);
|
||||
}
|
||||
|
||||
// Segment size in bytes, pro-rated by % deleted
|
||||
|
@ -664,7 +670,27 @@ public class TieredMergePolicy extends MergePolicy {
|
|||
sb.append("forceMergeDeletesPctAllowed=").append(forceMergeDeletesPctAllowed).append(", ");
|
||||
sb.append("segmentsPerTier=").append(segsPerTier).append(", ");
|
||||
sb.append("useCompoundFile=").append(useCompoundFile).append(", ");
|
||||
sb.append("maxCFSSegmentSizeMB=").append(getMaxCFSSegmentSizeMB()).append(", ");
|
||||
sb.append("noCFSRatio=").append(noCFSRatio);
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
/** Returns the largest size allowed for a compound file segment */
|
||||
public final double getMaxCFSSegmentSizeMB() {
|
||||
return maxCFSSegmentSize/1024/1024.;
|
||||
}
|
||||
|
||||
/** If a merged segment will be more than this value,
|
||||
* leave the segment as
|
||||
* non-compound file even if compound file is enabled.
|
||||
* Set this to Double.POSITIVE_INFINITY (default) and noCFSRatio to 1.0
|
||||
* to always use CFS regardless of merge size. */
|
||||
public final TieredMergePolicy setMaxCFSSegmentSizeMB(double v) {
|
||||
if (v < 0.0) {
|
||||
throw new IllegalArgumentException("maxCFSSegmentSizeMB must be >=0 (got " + v + ")");
|
||||
}
|
||||
v *= 1024 * 1024;
|
||||
this.maxCFSSegmentSize = (v > Long.MAX_VALUE) ? Long.MAX_VALUE : (long) v;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -263,4 +263,31 @@ public class TestIndexWriterMergePolicy extends LuceneTestCase {
|
|||
assertTrue(numSegments < mergeFactor);
|
||||
}
|
||||
}
|
||||
|
||||
private static final double EPSILON = 1E-14;
|
||||
|
||||
public void testSetters() {
|
||||
assertSetters(new LogByteSizeMergePolicy());
|
||||
assertSetters(new LogDocMergePolicy());
|
||||
}
|
||||
|
||||
private void assertSetters(LogMergePolicy lmp) {
|
||||
lmp.setMaxCFSSegmentSizeMB(2.0);
|
||||
assertEquals(2.0, lmp.getMaxCFSSegmentSizeMB(), EPSILON);
|
||||
|
||||
lmp.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY);
|
||||
assertEquals(Long.MAX_VALUE/1024/1024., lmp.getMaxCFSSegmentSizeMB(), EPSILON*Long.MAX_VALUE);
|
||||
|
||||
lmp.setMaxCFSSegmentSizeMB(Long.MAX_VALUE/1024/1024.);
|
||||
assertEquals(Long.MAX_VALUE/1024/1024., lmp.getMaxCFSSegmentSizeMB(), EPSILON*Long.MAX_VALUE);
|
||||
|
||||
try {
|
||||
lmp.setMaxCFSSegmentSizeMB(-2.0);
|
||||
fail("Didn't throw IllegalArgumentException");
|
||||
} catch (IllegalArgumentException iae) {
|
||||
// pass
|
||||
}
|
||||
|
||||
// TODO: Add more checks for other non-double setters!
|
||||
}
|
||||
}
|
||||
|
|
|
@ -153,4 +153,60 @@ public class TestTieredMergePolicy extends LuceneTestCase {
|
|||
|
||||
dir.close();
|
||||
}
|
||||
|
||||
private static final double EPSILON = 1E-14;
|
||||
|
||||
public void testSetters() {
|
||||
final TieredMergePolicy tmp = new TieredMergePolicy();
|
||||
|
||||
tmp.setMaxMergedSegmentMB(0.5);
|
||||
assertEquals(0.5, tmp.getMaxMergedSegmentMB(), EPSILON);
|
||||
|
||||
tmp.setMaxMergedSegmentMB(Double.POSITIVE_INFINITY);
|
||||
assertEquals(Long.MAX_VALUE/1024/1024., tmp.getMaxMergedSegmentMB(), EPSILON*Long.MAX_VALUE);
|
||||
|
||||
tmp.setMaxMergedSegmentMB(Long.MAX_VALUE/1024/1024.);
|
||||
assertEquals(Long.MAX_VALUE/1024/1024., tmp.getMaxMergedSegmentMB(), EPSILON*Long.MAX_VALUE);
|
||||
|
||||
try {
|
||||
tmp.setMaxMergedSegmentMB(-2.0);
|
||||
fail("Didn't throw IllegalArgumentException");
|
||||
} catch (IllegalArgumentException iae) {
|
||||
// pass
|
||||
}
|
||||
|
||||
tmp.setFloorSegmentMB(2.0);
|
||||
assertEquals(2.0, tmp.getFloorSegmentMB(), EPSILON);
|
||||
|
||||
tmp.setFloorSegmentMB(Double.POSITIVE_INFINITY);
|
||||
assertEquals(Long.MAX_VALUE/1024/1024., tmp.getFloorSegmentMB(), EPSILON*Long.MAX_VALUE);
|
||||
|
||||
tmp.setFloorSegmentMB(Long.MAX_VALUE/1024/1024.);
|
||||
assertEquals(Long.MAX_VALUE/1024/1024., tmp.getFloorSegmentMB(), EPSILON*Long.MAX_VALUE);
|
||||
|
||||
try {
|
||||
tmp.setFloorSegmentMB(-2.0);
|
||||
fail("Didn't throw IllegalArgumentException");
|
||||
} catch (IllegalArgumentException iae) {
|
||||
// pass
|
||||
}
|
||||
|
||||
tmp.setMaxCFSSegmentSizeMB(2.0);
|
||||
assertEquals(2.0, tmp.getMaxCFSSegmentSizeMB(), EPSILON);
|
||||
|
||||
tmp.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY);
|
||||
assertEquals(Long.MAX_VALUE/1024/1024., tmp.getMaxCFSSegmentSizeMB(), EPSILON*Long.MAX_VALUE);
|
||||
|
||||
tmp.setMaxCFSSegmentSizeMB(Long.MAX_VALUE/1024/1024.);
|
||||
assertEquals(Long.MAX_VALUE/1024/1024., tmp.getMaxCFSSegmentSizeMB(), EPSILON*Long.MAX_VALUE);
|
||||
|
||||
try {
|
||||
tmp.setMaxCFSSegmentSizeMB(-2.0);
|
||||
fail("Didn't throw IllegalArgumentException");
|
||||
} catch (IllegalArgumentException iae) {
|
||||
// pass
|
||||
}
|
||||
|
||||
// TODO: Add more checks for other non-double setters!
|
||||
}
|
||||
}
|
||||
|
|
|
@ -765,6 +765,11 @@ public abstract class LuceneTestCase extends Assert {
|
|||
} else {
|
||||
logmp.setMergeFactor(_TestUtil.nextInt(r, 10, 50));
|
||||
}
|
||||
logmp.setUseCompoundFile(r.nextBoolean());
|
||||
logmp.setNoCFSRatio(0.1 + r.nextDouble()*0.8);
|
||||
if (rarely()) {
|
||||
logmp.setMaxCFSSegmentSizeMB(0.2 + r.nextDouble() * 2.0);
|
||||
}
|
||||
return logmp;
|
||||
}
|
||||
|
||||
|
@ -791,6 +796,9 @@ public abstract class LuceneTestCase extends Assert {
|
|||
}
|
||||
tmp.setUseCompoundFile(r.nextBoolean());
|
||||
tmp.setNoCFSRatio(0.1 + r.nextDouble()*0.8);
|
||||
if (rarely()) {
|
||||
tmp.setMaxCFSSegmentSizeMB(0.2 + r.nextDouble() * 2.0);
|
||||
}
|
||||
tmp.setReclaimDeletesWeight(r.nextDouble()*4);
|
||||
return tmp;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue