LUCENE-4323: Added support for an absolute maximum CFS segment size (in MiB) to LogMergePolicy and TieredMergePolicy

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1376766 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Uwe Schindler 2012-08-23 23:15:11 +00:00
parent a7baf1cd9f
commit 89f9beb738
6 changed files with 180 additions and 32 deletions

View File

@ -20,6 +20,10 @@ New Features
search performance. This was from Han Jiang's 2012 Google Summer of
Code project (Han Jiang, Adrien Grand, Robert Muir, Mike McCandless)
* LUCENE-4323: Added support for an absolute maximum CFS segment size
(in MiB) to LogMergePolicy and TieredMergePolicy.
(Alexey Lef via Uwe Schindler)
API Changes
* LUCENE-4299: Added Terms.hasPositions() and Terms.hasOffsets().

View File

@ -64,6 +64,13 @@ public abstract class LogMergePolicy extends MergePolicy {
* @see #setNoCFSRatio */
public static final double DEFAULT_NO_CFS_RATIO = 0.1;
/** Default maxCFSSegmentSize value allows compound file
* for a segment of any size. The actual file format is
* still subject to noCFSRatio.
* @see #setMaxCFSSegmentSizeMB(double)
*/
public static final long DEFAULT_MAX_CFS_SEGMENT_SIZE = Long.MAX_VALUE;
protected int mergeFactor = DEFAULT_MERGE_FACTOR;
protected long minMergeSize;
@ -74,6 +81,7 @@ public abstract class LogMergePolicy extends MergePolicy {
protected int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;
protected double noCFSRatio = DEFAULT_NO_CFS_RATIO;
protected long maxCFSSegmentSize = DEFAULT_MAX_CFS_SEGMENT_SIZE;
protected boolean calibrateSizeByDeletes = true;
@ -136,21 +144,21 @@ public abstract class LogMergePolicy extends MergePolicy {
// Javadoc inherited
@Override
public boolean useCompoundFile(SegmentInfos infos, SegmentInfoPerCommit mergedInfo) throws IOException {
final boolean doCFS;
if (!useCompoundFile) {
doCFS = false;
} else if (noCFSRatio == 1.0) {
doCFS = true;
} else {
long totalSize = 0;
for (SegmentInfoPerCommit info : infos) {
totalSize += size(info);
}
doCFS = size(mergedInfo) <= noCFSRatio * totalSize;
if (!getUseCompoundFile()) {
return false;
}
return doCFS;
long mergedInfoSize = size(mergedInfo);
if (mergedInfoSize > maxCFSSegmentSize) {
return false;
}
if (getNoCFSRatio() >= 1.0) {
return true;
}
long totalSize = 0;
for (SegmentInfoPerCommit info : infos) {
totalSize += size(info);
}
return mergedInfoSize <= getNoCFSRatio() * totalSize;
}
/** Sets whether compound file format should be used for
@ -674,9 +682,28 @@ public abstract class LogMergePolicy extends MergePolicy {
sb.append("calibrateSizeByDeletes=").append(calibrateSizeByDeletes).append(", ");
sb.append("maxMergeDocs=").append(maxMergeDocs).append(", ");
sb.append("useCompoundFile=").append(useCompoundFile).append(", ");
sb.append("maxCFSSegmentSizeMB=").append(getMaxCFSSegmentSizeMB()).append(", ");
sb.append("noCFSRatio=").append(noCFSRatio);
sb.append("]");
return sb.toString();
}
/** Returns the largest size allowed for a compound file segment */
public final double getMaxCFSSegmentSizeMB() {
return maxCFSSegmentSize/1024/1024.;
}
/** If a merged segment will be more than this value,
* leave the segment as
* non-compound file even if compound file is enabled.
* Set this to Double.POSITIVE_INFINITY (default) and noCFSRatio to 1.0
* to always use CFS regardless of merge size. */
public final void setMaxCFSSegmentSizeMB(double v) {
if (v < 0.0) {
throw new IllegalArgumentException("maxCFSSegmentSizeMB must be >=0 (got " + v + ")");
}
v *= 1024 * 1024;
this.maxCFSSegmentSize = (v > Long.MAX_VALUE) ? Long.MAX_VALUE : (long) v;
}
}

View File

@ -84,6 +84,7 @@ public class TieredMergePolicy extends MergePolicy {
private double forceMergeDeletesPctAllowed = 10.0;
private boolean useCompoundFile = true;
private double noCFSRatio = 0.1;
private long maxCFSSegmentSize = Long.MAX_VALUE;
private double reclaimDeletesWeight = 2.0;
/** Maximum number of segments to be merged at a time
@ -127,7 +128,11 @@ public class TieredMergePolicy extends MergePolicy {
* sizes of to-be-merged segments (compensating for
* percent deleted docs). Default is 5 GB. */
public TieredMergePolicy setMaxMergedSegmentMB(double v) {
maxMergedSegmentBytes = (long) (v*1024*1024);
if (v < 0.0) {
throw new IllegalArgumentException("maxMergedSegmentMB must be >=0 (got " + v + ")");
}
v *= 1024 * 1024;
maxMergedSegmentBytes = (v > Long.MAX_VALUE) ? Long.MAX_VALUE : (long) v;
return this;
}
@ -162,7 +167,8 @@ public class TieredMergePolicy extends MergePolicy {
if (v <= 0.0) {
throw new IllegalArgumentException("floorSegmentMB must be >= 0.0 (got " + v + ")");
}
floorSegmentBytes = (long) (v*1024*1024);
v *= 1024 * 1024;
floorSegmentBytes = (v > Long.MAX_VALUE) ? Long.MAX_VALUE : (long) v;
return this;
}
@ -602,21 +608,21 @@ public class TieredMergePolicy extends MergePolicy {
@Override
public boolean useCompoundFile(SegmentInfos infos, SegmentInfoPerCommit mergedInfo) throws IOException {
final boolean doCFS;
if (!useCompoundFile) {
doCFS = false;
} else if (noCFSRatio == 1.0) {
doCFS = true;
} else {
long totalSize = 0;
for (SegmentInfoPerCommit info : infos) {
totalSize += size(info);
}
doCFS = size(mergedInfo) <= noCFSRatio * totalSize;
if (!getUseCompoundFile()) {
return false;
}
return doCFS;
long mergedInfoSize = size(mergedInfo);
if (mergedInfoSize > maxCFSSegmentSize) {
return false;
}
if (getNoCFSRatio() >= 1.0) {
return true;
}
long totalSize = 0;
for (SegmentInfoPerCommit info : infos) {
totalSize += size(info);
}
return mergedInfoSize <= getNoCFSRatio() * totalSize;
}
@Override
@ -629,7 +635,7 @@ public class TieredMergePolicy extends MergePolicy {
boolean hasDeletions = w.numDeletedDocs(info) > 0;
return !hasDeletions &&
info.info.dir == w.getDirectory() &&
(info.info.getUseCompoundFile() == useCompoundFile || noCFSRatio < 1.0);
(info.info.getUseCompoundFile() == useCompoundFile || noCFSRatio < 1.0 || maxCFSSegmentSize < Long.MAX_VALUE);
}
// Segment size in bytes, pro-rated by % deleted
@ -664,7 +670,27 @@ public class TieredMergePolicy extends MergePolicy {
sb.append("forceMergeDeletesPctAllowed=").append(forceMergeDeletesPctAllowed).append(", ");
sb.append("segmentsPerTier=").append(segsPerTier).append(", ");
sb.append("useCompoundFile=").append(useCompoundFile).append(", ");
sb.append("maxCFSSegmentSizeMB=").append(getMaxCFSSegmentSizeMB()).append(", ");
sb.append("noCFSRatio=").append(noCFSRatio);
return sb.toString();
}
/** Returns the largest size allowed for a compound file segment */
public final double getMaxCFSSegmentSizeMB() {
return maxCFSSegmentSize/1024/1024.;
}
/** If a merged segment will be more than this value,
* leave the segment as
* non-compound file even if compound file is enabled.
* Set this to Double.POSITIVE_INFINITY (default) and noCFSRatio to 1.0
* to always use CFS regardless of merge size. */
public final TieredMergePolicy setMaxCFSSegmentSizeMB(double v) {
if (v < 0.0) {
throw new IllegalArgumentException("maxCFSSegmentSizeMB must be >=0 (got " + v + ")");
}
v *= 1024 * 1024;
this.maxCFSSegmentSize = (v > Long.MAX_VALUE) ? Long.MAX_VALUE : (long) v;
return this;
}
}

View File

@ -263,4 +263,31 @@ public class TestIndexWriterMergePolicy extends LuceneTestCase {
assertTrue(numSegments < mergeFactor);
}
}
private static final double EPSILON = 1E-14;
public void testSetters() {
assertSetters(new LogByteSizeMergePolicy());
assertSetters(new LogDocMergePolicy());
}
private void assertSetters(LogMergePolicy lmp) {
lmp.setMaxCFSSegmentSizeMB(2.0);
assertEquals(2.0, lmp.getMaxCFSSegmentSizeMB(), EPSILON);
lmp.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY);
assertEquals(Long.MAX_VALUE/1024/1024., lmp.getMaxCFSSegmentSizeMB(), EPSILON*Long.MAX_VALUE);
lmp.setMaxCFSSegmentSizeMB(Long.MAX_VALUE/1024/1024.);
assertEquals(Long.MAX_VALUE/1024/1024., lmp.getMaxCFSSegmentSizeMB(), EPSILON*Long.MAX_VALUE);
try {
lmp.setMaxCFSSegmentSizeMB(-2.0);
fail("Didn't throw IllegalArgumentException");
} catch (IllegalArgumentException iae) {
// pass
}
// TODO: Add more checks for other non-double setters!
}
}

View File

@ -153,4 +153,60 @@ public class TestTieredMergePolicy extends LuceneTestCase {
dir.close();
}
private static final double EPSILON = 1E-14;
public void testSetters() {
final TieredMergePolicy tmp = new TieredMergePolicy();
tmp.setMaxMergedSegmentMB(0.5);
assertEquals(0.5, tmp.getMaxMergedSegmentMB(), EPSILON);
tmp.setMaxMergedSegmentMB(Double.POSITIVE_INFINITY);
assertEquals(Long.MAX_VALUE/1024/1024., tmp.getMaxMergedSegmentMB(), EPSILON*Long.MAX_VALUE);
tmp.setMaxMergedSegmentMB(Long.MAX_VALUE/1024/1024.);
assertEquals(Long.MAX_VALUE/1024/1024., tmp.getMaxMergedSegmentMB(), EPSILON*Long.MAX_VALUE);
try {
tmp.setMaxMergedSegmentMB(-2.0);
fail("Didn't throw IllegalArgumentException");
} catch (IllegalArgumentException iae) {
// pass
}
tmp.setFloorSegmentMB(2.0);
assertEquals(2.0, tmp.getFloorSegmentMB(), EPSILON);
tmp.setFloorSegmentMB(Double.POSITIVE_INFINITY);
assertEquals(Long.MAX_VALUE/1024/1024., tmp.getFloorSegmentMB(), EPSILON*Long.MAX_VALUE);
tmp.setFloorSegmentMB(Long.MAX_VALUE/1024/1024.);
assertEquals(Long.MAX_VALUE/1024/1024., tmp.getFloorSegmentMB(), EPSILON*Long.MAX_VALUE);
try {
tmp.setFloorSegmentMB(-2.0);
fail("Didn't throw IllegalArgumentException");
} catch (IllegalArgumentException iae) {
// pass
}
tmp.setMaxCFSSegmentSizeMB(2.0);
assertEquals(2.0, tmp.getMaxCFSSegmentSizeMB(), EPSILON);
tmp.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY);
assertEquals(Long.MAX_VALUE/1024/1024., tmp.getMaxCFSSegmentSizeMB(), EPSILON*Long.MAX_VALUE);
tmp.setMaxCFSSegmentSizeMB(Long.MAX_VALUE/1024/1024.);
assertEquals(Long.MAX_VALUE/1024/1024., tmp.getMaxCFSSegmentSizeMB(), EPSILON*Long.MAX_VALUE);
try {
tmp.setMaxCFSSegmentSizeMB(-2.0);
fail("Didn't throw IllegalArgumentException");
} catch (IllegalArgumentException iae) {
// pass
}
// TODO: Add more checks for other non-double setters!
}
}

View File

@ -765,6 +765,11 @@ public abstract class LuceneTestCase extends Assert {
} else {
logmp.setMergeFactor(_TestUtil.nextInt(r, 10, 50));
}
logmp.setUseCompoundFile(r.nextBoolean());
logmp.setNoCFSRatio(0.1 + r.nextDouble()*0.8);
if (rarely()) {
logmp.setMaxCFSSegmentSizeMB(0.2 + r.nextDouble() * 2.0);
}
return logmp;
}
@ -791,6 +796,9 @@ public abstract class LuceneTestCase extends Assert {
}
tmp.setUseCompoundFile(r.nextBoolean());
tmp.setNoCFSRatio(0.1 + r.nextDouble()*0.8);
if (rarely()) {
tmp.setMaxCFSSegmentSizeMB(0.2 + r.nextDouble() * 2.0);
}
tmp.setReclaimDeletesWeight(r.nextDouble()*4);
return tmp;
}