LUCENE-8330: Detach IndexWriter from MergePolicy

This change introduces a new MergePolicy.MergeContext interface
that is easy to mock and cuts over all instances of IW to MergeContext.
Since IW now implements MergeContext the cut over is straight forward.
This reduces the exposed API available in MP dramatically and allows
efficient testing without relying on IW to improve the coverage and
testability of our MP implementations.
This commit is contained in:
Simon Willnauer 2018-05-24 14:01:22 +02:00
parent 54a63d0d0c
commit c93f628317
20 changed files with 362 additions and 240 deletions

View File

@ -119,6 +119,10 @@ Changes in Runtime Behavior
* LUCENE-8309: Live docs are no longer backed by a FixedBitSet. (Adrien Grand) * LUCENE-8309: Live docs are no longer backed by a FixedBitSet. (Adrien Grand)
* LUCENE-8330: Detach IndexWriter from MergePolicy. MergePolicy now instead of
requiring IndexWriter as a hard dependency expects a MergeContext which
IndexWriter implements. (Simon Willnauer, Robert Muir, Dawid Weiss, Mike McCandless)
New Features New Features
* LUCENE-8200: Allow doc-values to be updated atomically together * LUCENE-8200: Allow doc-values to be updated atomically together

View File

@ -41,31 +41,31 @@ public class FilterMergePolicy extends MergePolicy {
} }
@Override @Override
public MergeSpecification findMerges(MergeTrigger mergeTrigger, SegmentInfos segmentInfos, IndexWriter writer) public MergeSpecification findMerges(MergeTrigger mergeTrigger, SegmentInfos segmentInfos, MergeContext mergeContext)
throws IOException { throws IOException {
return in.findMerges(mergeTrigger, segmentInfos, writer); return in.findMerges(mergeTrigger, segmentInfos, mergeContext);
} }
@Override @Override
public MergeSpecification findForcedMerges(SegmentInfos segmentInfos, int maxSegmentCount, public MergeSpecification findForcedMerges(SegmentInfos segmentInfos, int maxSegmentCount,
Map<SegmentCommitInfo,Boolean> segmentsToMerge, IndexWriter writer) throws IOException { Map<SegmentCommitInfo,Boolean> segmentsToMerge, MergeContext mergeContext) throws IOException {
return in.findForcedMerges(segmentInfos, maxSegmentCount, segmentsToMerge, writer); return in.findForcedMerges(segmentInfos, maxSegmentCount, segmentsToMerge, mergeContext);
} }
@Override @Override
public MergeSpecification findForcedDeletesMerges(SegmentInfos segmentInfos, IndexWriter writer) throws IOException { public MergeSpecification findForcedDeletesMerges(SegmentInfos segmentInfos, MergeContext mergeContext) throws IOException {
return in.findForcedDeletesMerges(segmentInfos, writer); return in.findForcedDeletesMerges(segmentInfos, mergeContext);
} }
@Override @Override
public boolean useCompoundFile(SegmentInfos infos, SegmentCommitInfo mergedInfo, IndexWriter writer) public boolean useCompoundFile(SegmentInfos infos, SegmentCommitInfo mergedInfo, MergeContext mergeContext)
throws IOException { throws IOException {
return in.useCompoundFile(infos, mergedInfo, writer); return in.useCompoundFile(infos, mergedInfo, mergeContext);
} }
@Override @Override
protected long size(SegmentCommitInfo info, IndexWriter writer) throws IOException { protected long size(SegmentCommitInfo info, MergeContext context) throws IOException {
return in.size(info, writer); return in.size(info, context);
} }
@Override @Override

View File

@ -39,6 +39,7 @@ import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.atomic.AtomicReference;
import java.util.function.IntPredicate; import java.util.function.IntPredicate;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.StreamSupport;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.Codec;
@ -207,7 +208,8 @@ import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
* referenced by the "front" of the index). For this, IndexFileDeleter * referenced by the "front" of the index). For this, IndexFileDeleter
* keeps track of the last non commit checkpoint. * keeps track of the last non commit checkpoint.
*/ */
public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable,
MergePolicy.MergeContext {
/** Hard limit on maximum number of documents that may be added to the /** Hard limit on maximum number of documents that may be added to the
* index. If you try to add more than this you'll hit {@code IllegalArgumentException}. */ * index. If you try to add more than this you'll hit {@code IllegalArgumentException}. */
@ -629,8 +631,10 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
* If the reader isn't being pooled, the segmentInfo's * If the reader isn't being pooled, the segmentInfo's
* delCount is returned. * delCount is returned.
*/ */
@Override
public int numDeletedDocs(SegmentCommitInfo info) { public int numDeletedDocs(SegmentCommitInfo info) {
ensureOpen(false); ensureOpen(false);
validate(info);
int delCount = info.getDelCount(); int delCount = info.getDelCount();
final ReadersAndUpdates rld = getPooledInstance(info, false); final ReadersAndUpdates rld = getPooledInstance(info, false);
@ -1089,6 +1093,11 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
return directoryOrig; return directoryOrig;
} }
@Override
public InfoStream getInfoStream() {
return infoStream;
}
/** Returns the analyzer used by this index. */ /** Returns the analyzer used by this index. */
public Analyzer getAnalyzer() { public Analyzer getAnalyzer() {
ensureOpen(); ensureOpen();
@ -4587,26 +4596,16 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
return segString(segmentInfos); return segString(segmentInfos);
} }
/** Returns a string description of the specified
* segments, for debugging.
*
* @lucene.internal */
synchronized String segString(Iterable<SegmentCommitInfo> infos) { synchronized String segString(Iterable<SegmentCommitInfo> infos) {
final StringBuilder buffer = new StringBuilder(); return StreamSupport.stream(infos.spliterator(), false)
for(final SegmentCommitInfo info : infos) { .map(this::segString).collect(Collectors.joining(" "));
if (buffer.length() > 0) {
buffer.append(' ');
}
buffer.append(segString(info));
}
return buffer.toString();
} }
/** Returns a string description of the specified /** Returns a string description of the specified
* segment, for debugging. * segment, for debugging.
* *
* @lucene.internal */ * @lucene.internal */
synchronized String segString(SegmentCommitInfo info) { private synchronized String segString(SegmentCommitInfo info) {
return info.toString(numDeletedDocs(info) - info.getDelCount()); return info.toString(numDeletedDocs(info) - info.getDelCount());
} }
@ -5130,8 +5129,10 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
* @param info the segment to get the number of deletes for * @param info the segment to get the number of deletes for
* @lucene.experimental * @lucene.experimental
*/ */
@Override
public final int numDeletesToMerge(SegmentCommitInfo info) throws IOException { public final int numDeletesToMerge(SegmentCommitInfo info) throws IOException {
ensureOpen(false); ensureOpen(false);
validate(info);
MergePolicy mergePolicy = config.getMergePolicy(); MergePolicy mergePolicy = config.getMergePolicy();
final ReadersAndUpdates rld = getPooledInstance(info, false); final ReadersAndUpdates rld = getPooledInstance(info, false);
int numDeletesToMerge; int numDeletesToMerge;
@ -5178,4 +5179,10 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
protected boolean isEnableTestPoints() { protected boolean isEnableTestPoints() {
return false; return false;
} }
private void validate(SegmentCommitInfo info) {
if (info.info.dir != directoryOrig) {
throw new IllegalArgumentException("SegmentCommitInfo must be from the same directory");
}
}
} }

View File

@ -44,8 +44,8 @@ public class LogByteSizeMergePolicy extends LogMergePolicy {
} }
@Override @Override
protected long size(SegmentCommitInfo info, IndexWriter writer) throws IOException { protected long size(SegmentCommitInfo info, MergeContext mergeContext) throws IOException {
return sizeBytes(info, writer); return sizeBytes(info, mergeContext);
} }
/** <p>Determines the largest segment (measured by total /** <p>Determines the largest segment (measured by total

View File

@ -40,8 +40,8 @@ public class LogDocMergePolicy extends LogMergePolicy {
} }
@Override @Override
protected long size(SegmentCommitInfo info, IndexWriter writer) throws IOException { protected long size(SegmentCommitInfo info, MergeContext mergeContext) throws IOException {
return sizeDocs(info, writer); return sizeDocs(info, mergeContext);
} }
/** Sets the minimum size for the lowest level segments. /** Sets the minimum size for the lowest level segments.

View File

@ -19,6 +19,7 @@ package org.apache.lucene.index;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.Locale; import java.util.Locale;
import java.util.Map; import java.util.Map;
@ -96,20 +97,6 @@ public abstract class LogMergePolicy extends MergePolicy {
super(DEFAULT_NO_CFS_RATIO, MergePolicy.DEFAULT_MAX_CFS_SEGMENT_SIZE); super(DEFAULT_NO_CFS_RATIO, MergePolicy.DEFAULT_MAX_CFS_SEGMENT_SIZE);
} }
/** Returns true if {@code LMP} is enabled in {@link
* IndexWriter}'s {@code infoStream}. */
protected boolean verbose(IndexWriter writer) {
return writer != null && writer.infoStream.isEnabled("LMP");
}
/** Print a debug message to {@link IndexWriter}'s {@code
* infoStream}. */
protected void message(String message, IndexWriter writer) {
if (verbose(writer)) {
writer.infoStream.message("LMP", message);
}
}
/** <p>Returns the number of segments that are merged at /** <p>Returns the number of segments that are merged at
* once and also controls the total number of segments * once and also controls the total number of segments
* allowed to accumulate in the index.</p> */ * allowed to accumulate in the index.</p> */
@ -148,10 +135,10 @@ public abstract class LogMergePolicy extends MergePolicy {
* SegmentCommitInfo}, pro-rated by percentage of * SegmentCommitInfo}, pro-rated by percentage of
* non-deleted documents if {@link * non-deleted documents if {@link
* #setCalibrateSizeByDeletes} is set. */ * #setCalibrateSizeByDeletes} is set. */
protected long sizeDocs(SegmentCommitInfo info, IndexWriter writer) throws IOException { protected long sizeDocs(SegmentCommitInfo info, MergeContext mergeContext) throws IOException {
if (calibrateSizeByDeletes) { if (calibrateSizeByDeletes) {
int delCount = writer.numDeletesToMerge(info); int delCount = mergeContext.numDeletesToMerge(info);
assert delCount <= info.info.maxDoc(); assert assertDelCount(delCount, info);
return (info.info.maxDoc() - (long)delCount); return (info.info.maxDoc() - (long)delCount);
} else { } else {
return info.info.maxDoc(); return info.info.maxDoc();
@ -162,9 +149,9 @@ public abstract class LogMergePolicy extends MergePolicy {
* SegmentCommitInfo}, pro-rated by percentage of * SegmentCommitInfo}, pro-rated by percentage of
* non-deleted documents if {@link * non-deleted documents if {@link
* #setCalibrateSizeByDeletes} is set. */ * #setCalibrateSizeByDeletes} is set. */
protected long sizeBytes(SegmentCommitInfo info, IndexWriter writer) throws IOException { protected long sizeBytes(SegmentCommitInfo info, MergeContext mergeContext) throws IOException {
if (calibrateSizeByDeletes) { if (calibrateSizeByDeletes) {
return super.size(info, writer); return super.size(info, mergeContext);
} }
return info.sizeInBytes(); return info.sizeInBytes();
} }
@ -172,7 +159,7 @@ public abstract class LogMergePolicy extends MergePolicy {
/** Returns true if the number of segments eligible for /** Returns true if the number of segments eligible for
* merging is less than or equal to the specified {@code * merging is less than or equal to the specified {@code
* maxNumSegments}. */ * maxNumSegments}. */
protected boolean isMerged(SegmentInfos infos, int maxNumSegments, Map<SegmentCommitInfo,Boolean> segmentsToMerge, IndexWriter writer) throws IOException { protected boolean isMerged(SegmentInfos infos, int maxNumSegments, Map<SegmentCommitInfo,Boolean> segmentsToMerge, MergeContext mergeContext) throws IOException {
final int numSegments = infos.size(); final int numSegments = infos.size();
int numToMerge = 0; int numToMerge = 0;
SegmentCommitInfo mergeInfo = null; SegmentCommitInfo mergeInfo = null;
@ -188,7 +175,7 @@ public abstract class LogMergePolicy extends MergePolicy {
} }
return numToMerge <= maxNumSegments && return numToMerge <= maxNumSegments &&
(numToMerge != 1 || !segmentIsOriginal || isMerged(infos, mergeInfo, writer)); (numToMerge != 1 || !segmentIsOriginal || isMerged(infos, mergeInfo, mergeContext));
} }
/** /**
@ -200,20 +187,20 @@ public abstract class LogMergePolicy extends MergePolicy {
* maxNumSegments} will remain, but &lt;= that number. * maxNumSegments} will remain, but &lt;= that number.
*/ */
private MergeSpecification findForcedMergesSizeLimit( private MergeSpecification findForcedMergesSizeLimit(
SegmentInfos infos, int maxNumSegments, int last, IndexWriter writer) throws IOException { SegmentInfos infos, int last, MergeContext mergeContext) throws IOException {
MergeSpecification spec = new MergeSpecification(); MergeSpecification spec = new MergeSpecification();
final List<SegmentCommitInfo> segments = infos.asList(); final List<SegmentCommitInfo> segments = infos.asList();
int start = last - 1; int start = last - 1;
while (start >= 0) { while (start >= 0) {
SegmentCommitInfo info = infos.info(start); SegmentCommitInfo info = infos.info(start);
if (size(info, writer) > maxMergeSizeForForcedMerge || sizeDocs(info, writer) > maxMergeDocs) { if (size(info, mergeContext) > maxMergeSizeForForcedMerge || sizeDocs(info, mergeContext) > maxMergeDocs) {
if (verbose(writer)) { if (verbose(mergeContext)) {
message("findForcedMergesSizeLimit: skip segment=" + info + ": size is > maxMergeSize (" + maxMergeSizeForForcedMerge + ") or sizeDocs is > maxMergeDocs (" + maxMergeDocs + ")", writer); message("findForcedMergesSizeLimit: skip segment=" + info + ": size is > maxMergeSize (" + maxMergeSizeForForcedMerge + ") or sizeDocs is > maxMergeDocs (" + maxMergeDocs + ")", mergeContext);
} }
// need to skip that segment + add a merge for the 'right' segments, // need to skip that segment + add a merge for the 'right' segments,
// unless there is only 1 which is merged. // unless there is only 1 which is merged.
if (last - start - 1 > 1 || (start != last - 1 && !isMerged(infos, infos.info(start + 1), writer))) { if (last - start - 1 > 1 || (start != last - 1 && !isMerged(infos, infos.info(start + 1), mergeContext))) {
// there is more than 1 segment to the right of // there is more than 1 segment to the right of
// this one, or a mergeable single segment. // this one, or a mergeable single segment.
spec.add(new OneMerge(segments.subList(start + 1, last))); spec.add(new OneMerge(segments.subList(start + 1, last)));
@ -229,7 +216,7 @@ public abstract class LogMergePolicy extends MergePolicy {
// Add any left-over segments, unless there is just 1 // Add any left-over segments, unless there is just 1
// already fully merged // already fully merged
if (last > 0 && (++start + 1 < last || !isMerged(infos, infos.info(start), writer))) { if (last > 0 && (++start + 1 < last || !isMerged(infos, infos.info(start), mergeContext))) {
spec.add(new OneMerge(segments.subList(start, last))); spec.add(new OneMerge(segments.subList(start, last)));
} }
@ -241,7 +228,7 @@ public abstract class LogMergePolicy extends MergePolicy {
* the returned merges only by the {@code maxNumSegments} parameter, and * the returned merges only by the {@code maxNumSegments} parameter, and
* guaranteed that exactly that number of segments will remain in the index. * guaranteed that exactly that number of segments will remain in the index.
*/ */
private MergeSpecification findForcedMergesMaxNumSegments(SegmentInfos infos, int maxNumSegments, int last, IndexWriter writer) throws IOException { private MergeSpecification findForcedMergesMaxNumSegments(SegmentInfos infos, int maxNumSegments, int last, MergeContext mergeContext) throws IOException {
MergeSpecification spec = new MergeSpecification(); MergeSpecification spec = new MergeSpecification();
final List<SegmentCommitInfo> segments = infos.asList(); final List<SegmentCommitInfo> segments = infos.asList();
@ -259,7 +246,7 @@ public abstract class LogMergePolicy extends MergePolicy {
// Since we must merge down to 1 segment, the // Since we must merge down to 1 segment, the
// choice is simple: // choice is simple:
if (last > 1 || !isMerged(infos, infos.info(0), writer)) { if (last > 1 || !isMerged(infos, infos.info(0), mergeContext)) {
spec.add(new OneMerge(segments.subList(0, last))); spec.add(new OneMerge(segments.subList(0, last)));
} }
} else if (last > maxNumSegments) { } else if (last > maxNumSegments) {
@ -282,9 +269,9 @@ public abstract class LogMergePolicy extends MergePolicy {
for(int i=0;i<last-finalMergeSize+1;i++) { for(int i=0;i<last-finalMergeSize+1;i++) {
long sumSize = 0; long sumSize = 0;
for(int j=0;j<finalMergeSize;j++) { for(int j=0;j<finalMergeSize;j++) {
sumSize += size(infos.info(j+i), writer); sumSize += size(infos.info(j+i), mergeContext);
} }
if (i == 0 || (sumSize < 2*size(infos.info(i-1), writer) && sumSize < bestSize)) { if (i == 0 || (sumSize < 2*size(infos.info(i-1), mergeContext) && sumSize < bestSize)) {
bestStart = i; bestStart = i;
bestSize = sumSize; bestSize = sumSize;
} }
@ -308,18 +295,18 @@ public abstract class LogMergePolicy extends MergePolicy {
* in use may make use of concurrency. */ * in use may make use of concurrency. */
@Override @Override
public MergeSpecification findForcedMerges(SegmentInfos infos, public MergeSpecification findForcedMerges(SegmentInfos infos,
int maxNumSegments, Map<SegmentCommitInfo,Boolean> segmentsToMerge, IndexWriter writer) throws IOException { int maxNumSegments, Map<SegmentCommitInfo,Boolean> segmentsToMerge, MergeContext mergeContext) throws IOException {
assert maxNumSegments > 0; assert maxNumSegments > 0;
if (verbose(writer)) { if (verbose(mergeContext)) {
message("findForcedMerges: maxNumSegs=" + maxNumSegments + " segsToMerge="+ segmentsToMerge, writer); message("findForcedMerges: maxNumSegs=" + maxNumSegments + " segsToMerge="+ segmentsToMerge, mergeContext);
} }
// If the segments are already merged (e.g. there's only 1 segment), or // If the segments are already merged (e.g. there's only 1 segment), or
// there are <maxNumSegments:. // there are <maxNumSegments:.
if (isMerged(infos, maxNumSegments, segmentsToMerge, writer)) { if (isMerged(infos, maxNumSegments, segmentsToMerge, mergeContext)) {
if (verbose(writer)) { if (verbose(mergeContext)) {
message("already merged; skip", writer); message("already merged; skip", mergeContext);
} }
return null; return null;
} }
@ -337,16 +324,16 @@ public abstract class LogMergePolicy extends MergePolicy {
} }
if (last == 0) { if (last == 0) {
if (verbose(writer)) { if (verbose(mergeContext)) {
message("last == 0; skip", writer); message("last == 0; skip", mergeContext);
} }
return null; return null;
} }
// There is only one segment already, and it is merged // There is only one segment already, and it is merged
if (maxNumSegments == 1 && last == 1 && isMerged(infos, infos.info(0), writer)) { if (maxNumSegments == 1 && last == 1 && isMerged(infos, infos.info(0), mergeContext)) {
if (verbose(writer)) { if (verbose(mergeContext)) {
message("already 1 seg; skip", writer); message("already 1 seg; skip", mergeContext);
} }
return null; return null;
} }
@ -355,16 +342,16 @@ public abstract class LogMergePolicy extends MergePolicy {
boolean anyTooLarge = false; boolean anyTooLarge = false;
for (int i = 0; i < last; i++) { for (int i = 0; i < last; i++) {
SegmentCommitInfo info = infos.info(i); SegmentCommitInfo info = infos.info(i);
if (size(info, writer) > maxMergeSizeForForcedMerge || sizeDocs(info, writer) > maxMergeDocs) { if (size(info, mergeContext) > maxMergeSizeForForcedMerge || sizeDocs(info, mergeContext) > maxMergeDocs) {
anyTooLarge = true; anyTooLarge = true;
break; break;
} }
} }
if (anyTooLarge) { if (anyTooLarge) {
return findForcedMergesSizeLimit(infos, maxNumSegments, last, writer); return findForcedMergesSizeLimit(infos, last, mergeContext);
} else { } else {
return findForcedMergesMaxNumSegments(infos, maxNumSegments, last, writer); return findForcedMergesMaxNumSegments(infos, maxNumSegments, last, mergeContext);
} }
} }
@ -374,32 +361,33 @@ public abstract class LogMergePolicy extends MergePolicy {
* deletes, up to mergeFactor at a time. * deletes, up to mergeFactor at a time.
*/ */
@Override @Override
public MergeSpecification findForcedDeletesMerges(SegmentInfos segmentInfos, IndexWriter writer) public MergeSpecification findForcedDeletesMerges(SegmentInfos segmentInfos, MergeContext mergeContext)
throws IOException { throws IOException {
final List<SegmentCommitInfo> segments = segmentInfos.asList(); final List<SegmentCommitInfo> segments = segmentInfos.asList();
final int numSegments = segments.size(); final int numSegments = segments.size();
if (verbose(writer)) { if (verbose(mergeContext)) {
message("findForcedDeleteMerges: " + numSegments + " segments", writer); message("findForcedDeleteMerges: " + numSegments + " segments", mergeContext);
} }
MergeSpecification spec = new MergeSpecification(); MergeSpecification spec = new MergeSpecification();
int firstSegmentWithDeletions = -1; int firstSegmentWithDeletions = -1;
assert writer != null; assert mergeContext != null;
for(int i=0;i<numSegments;i++) { for(int i=0;i<numSegments;i++) {
final SegmentCommitInfo info = segmentInfos.info(i); final SegmentCommitInfo info = segmentInfos.info(i);
int delCount = writer.numDeletesToMerge(info); int delCount = mergeContext.numDeletesToMerge(info);
assert assertDelCount(delCount, info);
if (delCount > 0) { if (delCount > 0) {
if (verbose(writer)) { if (verbose(mergeContext)) {
message(" segment " + info.info.name + " has deletions", writer); message(" segment " + info.info.name + " has deletions", mergeContext);
} }
if (firstSegmentWithDeletions == -1) if (firstSegmentWithDeletions == -1)
firstSegmentWithDeletions = i; firstSegmentWithDeletions = i;
else if (i - firstSegmentWithDeletions == mergeFactor) { else if (i - firstSegmentWithDeletions == mergeFactor) {
// We've seen mergeFactor segments in a row with // We've seen mergeFactor segments in a row with
// deletions, so force a merge now: // deletions, so force a merge now:
if (verbose(writer)) { if (verbose(mergeContext)) {
message(" add merge " + firstSegmentWithDeletions + " to " + (i-1) + " inclusive", writer); message(" add merge " + firstSegmentWithDeletions + " to " + (i-1) + " inclusive", mergeContext);
} }
spec.add(new OneMerge(segments.subList(firstSegmentWithDeletions, i))); spec.add(new OneMerge(segments.subList(firstSegmentWithDeletions, i)));
firstSegmentWithDeletions = i; firstSegmentWithDeletions = i;
@ -408,8 +396,8 @@ public abstract class LogMergePolicy extends MergePolicy {
// End of a sequence of segments with deletions, so, // End of a sequence of segments with deletions, so,
// merge those past segments even if it's fewer than // merge those past segments even if it's fewer than
// mergeFactor segments // mergeFactor segments
if (verbose(writer)) { if (verbose(mergeContext)) {
message(" add merge " + firstSegmentWithDeletions + " to " + (i-1) + " inclusive", writer); message(" add merge " + firstSegmentWithDeletions + " to " + (i-1) + " inclusive", mergeContext);
} }
spec.add(new OneMerge(segments.subList(firstSegmentWithDeletions, i))); spec.add(new OneMerge(segments.subList(firstSegmentWithDeletions, i)));
firstSegmentWithDeletions = -1; firstSegmentWithDeletions = -1;
@ -417,8 +405,8 @@ public abstract class LogMergePolicy extends MergePolicy {
} }
if (firstSegmentWithDeletions != -1) { if (firstSegmentWithDeletions != -1) {
if (verbose(writer)) { if (verbose(mergeContext)) {
message(" add merge " + firstSegmentWithDeletions + " to " + (numSegments-1) + " inclusive", writer); message(" add merge " + firstSegmentWithDeletions + " to " + (numSegments-1) + " inclusive", mergeContext);
} }
spec.add(new OneMerge(segments.subList(firstSegmentWithDeletions, numSegments))); spec.add(new OneMerge(segments.subList(firstSegmentWithDeletions, numSegments)));
} }
@ -450,11 +438,11 @@ public abstract class LogMergePolicy extends MergePolicy {
* will return multiple merges, allowing the {@link * will return multiple merges, allowing the {@link
* MergeScheduler} to use concurrency. */ * MergeScheduler} to use concurrency. */
@Override @Override
public MergeSpecification findMerges(MergeTrigger mergeTrigger, SegmentInfos infos, IndexWriter writer) throws IOException { public MergeSpecification findMerges(MergeTrigger mergeTrigger, SegmentInfos infos, MergeContext mergeContext) throws IOException {
final int numSegments = infos.size(); final int numSegments = infos.size();
if (verbose(writer)) { if (verbose(mergeContext)) {
message("findMerges: " + numSegments + " segments", writer); message("findMerges: " + numSegments + " segments", mergeContext);
} }
// Compute levels, which is just log (base mergeFactor) // Compute levels, which is just log (base mergeFactor)
@ -462,11 +450,11 @@ public abstract class LogMergePolicy extends MergePolicy {
final List<SegmentInfoAndLevel> levels = new ArrayList<>(numSegments); final List<SegmentInfoAndLevel> levels = new ArrayList<>(numSegments);
final float norm = (float) Math.log(mergeFactor); final float norm = (float) Math.log(mergeFactor);
final Set<SegmentCommitInfo> mergingSegments = writer.getMergingSegments(); final Set<SegmentCommitInfo> mergingSegments = mergeContext.getMergingSegments();
for(int i=0;i<numSegments;i++) { for(int i=0;i<numSegments;i++) {
final SegmentCommitInfo info = infos.info(i); final SegmentCommitInfo info = infos.info(i);
long size = size(info, writer); long size = size(info, mergeContext);
// Floor tiny segments // Floor tiny segments
if (size < 1) { if (size < 1) {
@ -476,13 +464,13 @@ public abstract class LogMergePolicy extends MergePolicy {
final SegmentInfoAndLevel infoLevel = new SegmentInfoAndLevel(info, (float) Math.log(size)/norm); final SegmentInfoAndLevel infoLevel = new SegmentInfoAndLevel(info, (float) Math.log(size)/norm);
levels.add(infoLevel); levels.add(infoLevel);
if (verbose(writer)) { if (verbose(mergeContext)) {
final long segBytes = sizeBytes(info, writer); final long segBytes = sizeBytes(info, mergeContext);
String extra = mergingSegments.contains(info) ? " [merging]" : ""; String extra = mergingSegments.contains(info) ? " [merging]" : "";
if (size >= maxMergeSize) { if (size >= maxMergeSize) {
extra += " [skip: too large]"; extra += " [skip: too large]";
} }
message("seg=" + writer.segString(info) + " level=" + infoLevel.level + " size=" + String.format(Locale.ROOT, "%.3f MB", segBytes/1024/1024.) + extra, writer); message("seg=" + segString(mergeContext, Collections.singleton(info)) + " level=" + infoLevel.level + " size=" + String.format(Locale.ROOT, "%.3f MB", segBytes/1024/1024.) + extra, mergeContext);
} }
} }
@ -538,8 +526,8 @@ public abstract class LogMergePolicy extends MergePolicy {
} }
upto--; upto--;
} }
if (verbose(writer)) { if (verbose(mergeContext)) {
message(" level " + levelBottom + " to " + maxLevel + ": " + (1+upto-start) + " segments", writer); message(" level " + levelBottom + " to " + maxLevel + ": " + (1+upto-start) + " segments", mergeContext);
} }
// Finally, record all merges that are viable at this level: // Finally, record all merges that are viable at this level:
@ -549,7 +537,7 @@ public abstract class LogMergePolicy extends MergePolicy {
boolean anyMerging = false; boolean anyMerging = false;
for(int i=start;i<end;i++) { for(int i=start;i<end;i++) {
final SegmentCommitInfo info = levels.get(i).info; final SegmentCommitInfo info = levels.get(i).info;
anyTooLarge |= (size(info, writer) >= maxMergeSize || sizeDocs(info, writer) >= maxMergeDocs); anyTooLarge |= (size(info, mergeContext) >= maxMergeSize || sizeDocs(info, mergeContext) >= maxMergeDocs);
if (mergingSegments.contains(info)) { if (mergingSegments.contains(info)) {
anyMerging = true; anyMerging = true;
break; break;
@ -566,12 +554,12 @@ public abstract class LogMergePolicy extends MergePolicy {
mergeInfos.add(levels.get(i).info); mergeInfos.add(levels.get(i).info);
assert infos.contains(levels.get(i).info); assert infos.contains(levels.get(i).info);
} }
if (verbose(writer)) { if (verbose(mergeContext)) {
message(" add merge=" + writer.segString(mergeInfos) + " start=" + start + " end=" + end, writer); message(" add merge=" + segString(mergeContext, mergeInfos) + " start=" + start + " end=" + end, mergeContext);
} }
spec.add(new OneMerge(mergeInfos)); spec.add(new OneMerge(mergeInfos));
} else if (verbose(writer)) { } else if (verbose(mergeContext)) {
message(" " + start + " to " + end + ": contains segment over maxMergeSize or maxMergeDocs; skipping", writer); message(" " + start + " to " + end + ": contains segment over maxMergeSize or maxMergeDocs; skipping", mergeContext);
} }
start = end; start = end;

View File

@ -29,12 +29,14 @@ import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.ReentrantLock; import java.util.concurrent.locks.ReentrantLock;
import java.util.function.BooleanSupplier; import java.util.function.BooleanSupplier;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.StreamSupport;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MergeInfo; import org.apache.lucene.store.MergeInfo;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.IOSupplier; import org.apache.lucene.util.IOSupplier;
import org.apache.lucene.util.InfoStream;
/** /**
* <p>Expert: a MergePolicy determines the sequence of * <p>Expert: a MergePolicy determines the sequence of
@ -50,7 +52,7 @@ import org.apache.lucene.util.IOSupplier;
* {@link MergeSpecification} instance describing the set of * {@link MergeSpecification} instance describing the set of
* merges that should be done, or null if no merges are * merges that should be done, or null if no merges are
* necessary. When IndexWriter.forceMerge is called, it calls * necessary. When IndexWriter.forceMerge is called, it calls
* {@link #findForcedMerges(SegmentInfos,int,Map, IndexWriter)} and the MergePolicy should * {@link #findForcedMerges(SegmentInfos, int, Map, MergeContext)} and the MergePolicy should
* then return the necessary merges.</p> * then return the necessary merges.</p>
* *
* <p>Note that the policy can return more than one merge at * <p>Note that the policy can return more than one merge at
@ -65,6 +67,7 @@ import org.apache.lucene.util.IOSupplier;
* @lucene.experimental * @lucene.experimental
*/ */
public abstract class MergePolicy { public abstract class MergePolicy {
/** /**
* Progress and state for an executing merge. This class * Progress and state for an executing merge. This class
* encapsulates the logic to pause and resume the merge thread * encapsulates the logic to pause and resume the merge thread
@ -483,9 +486,9 @@ public abstract class MergePolicy {
* @param mergeTrigger the event that triggered the merge * @param mergeTrigger the event that triggered the merge
* @param segmentInfos * @param segmentInfos
* the total set of segments in the index * the total set of segments in the index
* @param writer the IndexWriter to find the merges on * @param mergeContext the IndexWriter to find the merges on
*/ */
public abstract MergeSpecification findMerges(MergeTrigger mergeTrigger, SegmentInfos segmentInfos, IndexWriter writer) public abstract MergeSpecification findMerges(MergeTrigger mergeTrigger, SegmentInfos segmentInfos, MergeContext mergeContext)
throws IOException; throws IOException;
/** /**
@ -494,36 +497,34 @@ public abstract class MergePolicy {
* {@link IndexWriter#forceMerge} method is called. This call is always * {@link IndexWriter#forceMerge} method is called. This call is always
* synchronized on the {@link IndexWriter} instance so only one thread at a * synchronized on the {@link IndexWriter} instance so only one thread at a
* time will call this method. * time will call this method.
* * @param segmentInfos
* @param segmentInfos
* the total set of segments in the index * the total set of segments in the index
* @param maxSegmentCount * @param maxSegmentCount
* requested maximum number of segments in the index (currently this * requested maximum number of segments in the index (currently this
* is always 1) * is always 1)
* @param segmentsToMerge * @param segmentsToMerge
* contains the specific SegmentInfo instances that must be merged * contains the specific SegmentInfo instances that must be merged
* away. This may be a subset of all * away. This may be a subset of all
* SegmentInfos. If the value is True for a * SegmentInfos. If the value is True for a
* given SegmentInfo, that means this segment was * given SegmentInfo, that means this segment was
* an original segment present in the * an original segment present in the
* to-be-merged index; else, it was a segment * to-be-merged index; else, it was a segment
* produced by a cascaded merge. * produced by a cascaded merge.
* @param writer the IndexWriter to find the merges on * @param mergeContext the IndexWriter to find the merges on
*/ */
public abstract MergeSpecification findForcedMerges( public abstract MergeSpecification findForcedMerges(
SegmentInfos segmentInfos, int maxSegmentCount, Map<SegmentCommitInfo,Boolean> segmentsToMerge, IndexWriter writer) SegmentInfos segmentInfos, int maxSegmentCount, Map<SegmentCommitInfo,Boolean> segmentsToMerge, MergeContext mergeContext)
throws IOException; throws IOException;
/** /**
* Determine what set of merge operations is necessary in order to expunge all * Determine what set of merge operations is necessary in order to expunge all
* deletes from the index. * deletes from the index.
* * @param segmentInfos
* @param segmentInfos
* the total set of segments in the index * the total set of segments in the index
* @param writer the IndexWriter to find the merges on * @param mergeContext the IndexWriter to find the merges on
*/ */
public abstract MergeSpecification findForcedDeletesMerges( public abstract MergeSpecification findForcedDeletesMerges(
SegmentInfos segmentInfos, IndexWriter writer) throws IOException; SegmentInfos segmentInfos, MergeContext mergeContext) throws IOException;
/** /**
* Returns true if a new segment (regardless of its origin) should use the * Returns true if a new segment (regardless of its origin) should use the
@ -532,11 +533,11 @@ public abstract class MergePolicy {
* {@link #getMaxCFSSegmentSizeMB()} and the size is less or equal to the * {@link #getMaxCFSSegmentSizeMB()} and the size is less or equal to the
* TotalIndexSize * {@link #getNoCFSRatio()} otherwise <code>false</code>. * TotalIndexSize * {@link #getNoCFSRatio()} otherwise <code>false</code>.
*/ */
public boolean useCompoundFile(SegmentInfos infos, SegmentCommitInfo mergedInfo, IndexWriter writer) throws IOException { public boolean useCompoundFile(SegmentInfos infos, SegmentCommitInfo mergedInfo, MergeContext mergeContext) throws IOException {
if (getNoCFSRatio() == 0.0) { if (getNoCFSRatio() == 0.0) {
return false; return false;
} }
long mergedInfoSize = size(mergedInfo, writer); long mergedInfoSize = size(mergedInfo, mergeContext);
if (mergedInfoSize > maxCFSSegmentSize) { if (mergedInfoSize > maxCFSSegmentSize) {
return false; return false;
} }
@ -545,7 +546,7 @@ public abstract class MergePolicy {
} }
long totalSize = 0; long totalSize = 0;
for (SegmentCommitInfo info : infos) { for (SegmentCommitInfo info : infos) {
totalSize += size(info, writer); totalSize += size(info, mergeContext);
} }
return mergedInfoSize <= getNoCFSRatio() * totalSize; return mergedInfoSize <= getNoCFSRatio() * totalSize;
} }
@ -553,23 +554,34 @@ public abstract class MergePolicy {
/** Return the byte size of the provided {@link /** Return the byte size of the provided {@link
* SegmentCommitInfo}, pro-rated by percentage of * SegmentCommitInfo}, pro-rated by percentage of
* non-deleted documents is set. */ * non-deleted documents is set. */
protected long size(SegmentCommitInfo info, IndexWriter writer) throws IOException { protected long size(SegmentCommitInfo info, MergeContext mergeContext) throws IOException {
long byteSize = info.sizeInBytes(); long byteSize = info.sizeInBytes();
int delCount = writer.numDeletesToMerge(info); int delCount = mergeContext.numDeletesToMerge(info);
assert assertDelCount(delCount, info);
double delRatio = info.info.maxDoc() <= 0 ? 0.0f : (float) delCount / (float) info.info.maxDoc(); double delRatio = info.info.maxDoc() <= 0 ? 0.0f : (float) delCount / (float) info.info.maxDoc();
assert delRatio <= 1.0; assert delRatio <= 1.0;
return (info.info.maxDoc() <= 0 ? byteSize : (long) (byteSize * (1.0 - delRatio))); return (info.info.maxDoc() <= 0 ? byteSize : (long) (byteSize * (1.0 - delRatio)));
} }
/**
* Asserts that the delCount for this SegmentCommitInfo is valid
*/
protected final boolean assertDelCount(int delCount, SegmentCommitInfo info) {
assert delCount >= 0: "delCount must be positive: " + delCount;
assert delCount <= info.info.maxDoc() : "delCount: " + delCount
+ " must be leq than maxDoc: " + info.info.maxDoc();
return true;
}
/** Returns true if this single info is already fully merged (has no /** Returns true if this single info is already fully merged (has no
* pending deletes, is in the same dir as the * pending deletes, is in the same dir as the
* writer, and matches the current compound file setting */ * writer, and matches the current compound file setting */
protected final boolean isMerged(SegmentInfos infos, SegmentCommitInfo info, IndexWriter writer) throws IOException { protected final boolean isMerged(SegmentInfos infos, SegmentCommitInfo info, MergeContext mergeContext) throws IOException {
assert writer != null; assert mergeContext != null;
boolean hasDeletions = writer.numDeletesToMerge(info) > 0; int delCount = mergeContext.numDeletesToMerge(info);
return !hasDeletions && assert assertDelCount(delCount, info);
info.info.dir == writer.getDirectory() && return delCount == 0 &&
useCompoundFile(infos, info, writer) == info.info.getUseCompoundFile(); useCompoundFile(infos, info, mergeContext) == info.info.getUseCompoundFile();
} }
/** Returns current {@code noCFSRatio}. /** Returns current {@code noCFSRatio}.
@ -633,4 +645,61 @@ public abstract class MergePolicy {
IOSupplier<CodecReader> readerSupplier) throws IOException { IOSupplier<CodecReader> readerSupplier) throws IOException {
return info.getDelCount() + pendingDeleteCount; return info.getDelCount() + pendingDeleteCount;
} }
/**
* Builds a String representation of the given SegmentCommitInfo instances
*/
protected final String segString(MergeContext mergeContext, Iterable<SegmentCommitInfo> infos) {
return StreamSupport.stream(infos.spliterator(), false)
.map(info -> info.toString(mergeContext.numDeletedDocs(info) - info.getDelCount()))
.collect(Collectors.joining(" "));
}
/** Print a debug message to {@link MergeContext}'s {@code
* infoStream}. */
protected final void message(String message, MergeContext mergeContext) {
if (verbose(mergeContext)) {
mergeContext.getInfoStream().message("MP", message);
}
}
/**
* Returns <code>true</code> if the info-stream is in verbose mode
* @see #message(String, MergeContext)
*/
protected final boolean verbose(MergeContext mergeContext) {
return mergeContext.getInfoStream().isEnabled("MP");
}
/**
* This interface represents the current context of the merge selection process.
* It allows to access real-time information like the currently merging segments or
* how many deletes a segment would claim back if merged. This context might be stateful
* and change during the execution of a merge policy's selection processes.
* @lucene.experimental
*/
public interface MergeContext {
/**
* Returns the number of deletes a merge would claim back if the given segment is merged.
* @see MergePolicy#numDeletesToMerge(SegmentCommitInfo, int, org.apache.lucene.util.IOSupplier)
* @param info the segment to get the number of deletes for
*/
int numDeletesToMerge(SegmentCommitInfo info) throws IOException;
/**
* Returns the number of deleted documents in the given segments.
*/
int numDeletedDocs(SegmentCommitInfo info);
/**
* Returns the info stream that can be used to log messages
*/
InfoStream getInfoStream();
/**
* Returns an unmodifiable set of segments that are currently merging.
*/
Set<SegmentCommitInfo> getMergingSegments();
}
} }

View File

@ -19,7 +19,7 @@ package org.apache.lucene.index;
/** /**
* MergeTrigger is passed to * MergeTrigger is passed to
* {@link org.apache.lucene.index.MergePolicy#findMerges(MergeTrigger, org.apache.lucene.index.SegmentInfos, IndexWriter)} to indicate the * {@link MergePolicy#findMerges(MergeTrigger, SegmentInfos, MergePolicy.MergeContext)} to indicate the
* event that triggered the merge. * event that triggered the merge.
*/ */
public enum MergeTrigger { public enum MergeTrigger {

View File

@ -36,22 +36,22 @@ public final class NoMergePolicy extends MergePolicy {
} }
@Override @Override
public MergeSpecification findMerges(MergeTrigger mergeTrigger, SegmentInfos segmentInfos, IndexWriter writer) { return null; } public MergeSpecification findMerges(MergeTrigger mergeTrigger, SegmentInfos segmentInfos, MergeContext mergeContext) { return null; }
@Override @Override
public MergeSpecification findForcedMerges(SegmentInfos segmentInfos, public MergeSpecification findForcedMerges(SegmentInfos segmentInfos,
int maxSegmentCount, Map<SegmentCommitInfo,Boolean> segmentsToMerge, IndexWriter writer) { return null; } int maxSegmentCount, Map<SegmentCommitInfo,Boolean> segmentsToMerge, MergeContext mergeContext) { return null; }
@Override @Override
public MergeSpecification findForcedDeletesMerges(SegmentInfos segmentInfos, IndexWriter writer) { return null; } public MergeSpecification findForcedDeletesMerges(SegmentInfos segmentInfos, MergeContext mergeContext) { return null; }
@Override @Override
public boolean useCompoundFile(SegmentInfos segments, SegmentCommitInfo newSegment, IndexWriter writer) { public boolean useCompoundFile(SegmentInfos segments, SegmentCommitInfo newSegment, MergeContext mergeContext) {
return newSegment.info.getUseCompoundFile(); return newSegment.info.getUseCompoundFile();
} }
@Override @Override
protected long size(SegmentCommitInfo info, IndexWriter writer) throws IOException { protected long size(SegmentCommitInfo info, MergeContext context) throws IOException {
return Long.MAX_VALUE; return Long.MAX_VALUE;
} }

View File

@ -42,21 +42,21 @@ public class OneMergeWrappingMergePolicy extends FilterMergePolicy {
} }
@Override @Override
public MergeSpecification findMerges(MergeTrigger mergeTrigger, SegmentInfos segmentInfos, IndexWriter writer) public MergeSpecification findMerges(MergeTrigger mergeTrigger, SegmentInfos segmentInfos, MergeContext mergeContext)
throws IOException { throws IOException {
return wrapSpec(in.findMerges(mergeTrigger, segmentInfos, writer)); return wrapSpec(in.findMerges(mergeTrigger, segmentInfos, mergeContext));
} }
@Override @Override
public MergeSpecification findForcedMerges(SegmentInfos segmentInfos, int maxSegmentCount, public MergeSpecification findForcedMerges(SegmentInfos segmentInfos, int maxSegmentCount,
Map<SegmentCommitInfo,Boolean> segmentsToMerge, IndexWriter writer) throws IOException { Map<SegmentCommitInfo,Boolean> segmentsToMerge, MergeContext mergeContext) throws IOException {
return wrapSpec(in.findForcedMerges(segmentInfos, maxSegmentCount, segmentsToMerge, writer)); return wrapSpec(in.findForcedMerges(segmentInfos, maxSegmentCount, segmentsToMerge, mergeContext));
} }
@Override @Override
public MergeSpecification findForcedDeletesMerges(SegmentInfos segmentInfos, IndexWriter writer) public MergeSpecification findForcedDeletesMerges(SegmentInfos segmentInfos, MergeContext mergeContext)
throws IOException { throws IOException {
return wrapSpec(in.findForcedDeletesMerges(segmentInfos, writer)); return wrapSpec(in.findForcedDeletesMerges(segmentInfos, mergeContext));
} }
private MergeSpecification wrapSpec(MergeSpecification spec) { private MergeSpecification wrapSpec(MergeSpecification spec) {

View File

@ -20,6 +20,7 @@ package org.apache.lucene.index;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.Collections;
import java.util.Comparator; import java.util.Comparator;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
@ -272,23 +273,23 @@ public class TieredMergePolicy extends MergePolicy {
abstract String getExplanation(); abstract String getExplanation();
} }
private Map<SegmentCommitInfo,Long> getSegmentSizes(IndexWriter writer, Collection<SegmentCommitInfo> infos) throws IOException { private Map<SegmentCommitInfo,Long> getSegmentSizes(MergeContext mergeContext, Collection<SegmentCommitInfo> infos) throws IOException {
Map<SegmentCommitInfo,Long> sizeInBytes = new HashMap<>(); Map<SegmentCommitInfo,Long> sizeInBytes = new HashMap<>();
for (SegmentCommitInfo info : infos) { for (SegmentCommitInfo info : infos) {
sizeInBytes.put(info, size(info, writer)); sizeInBytes.put(info, size(info, mergeContext));
} }
return sizeInBytes; return sizeInBytes;
} }
@Override @Override
public MergeSpecification findMerges(MergeTrigger mergeTrigger, SegmentInfos infos, IndexWriter writer) throws IOException { public MergeSpecification findMerges(MergeTrigger mergeTrigger, SegmentInfos infos, MergeContext mergeContext) throws IOException {
if (verbose(writer)) { if (verbose(mergeContext)) {
message("findMerges: " + infos.size() + " segments", writer); message("findMerges: " + infos.size() + " segments", mergeContext);
} }
if (infos.size() == 0) { if (infos.size() == 0) {
return null; return null;
} }
final Set<SegmentCommitInfo> merging = writer.getMergingSegments(); final Set<SegmentCommitInfo> merging = mergeContext.getMergingSegments();
final Set<SegmentCommitInfo> toBeMerged = new HashSet<>(); final Set<SegmentCommitInfo> toBeMerged = new HashSet<>();
final List<SegmentCommitInfo> infosSorted = new ArrayList<>(infos.asList()); final List<SegmentCommitInfo> infosSorted = new ArrayList<>(infos.asList());
@ -296,7 +297,7 @@ public class TieredMergePolicy extends MergePolicy {
// The size can change concurrently while we are running here, because deletes // The size can change concurrently while we are running here, because deletes
// are now applied concurrently, and this can piss off TimSort! So we // are now applied concurrently, and this can piss off TimSort! So we
// call size() once per segment and sort by that: // call size() once per segment and sort by that:
Map<SegmentCommitInfo,Long> sizeInBytes = getSegmentSizes(writer, infos.asList()); Map<SegmentCommitInfo,Long> sizeInBytes = getSegmentSizes(mergeContext, infos.asList());
infosSorted.sort(new SegmentByteSizeDescending(sizeInBytes)); infosSorted.sort(new SegmentByteSizeDescending(sizeInBytes));
@ -305,14 +306,14 @@ public class TieredMergePolicy extends MergePolicy {
long minSegmentBytes = Long.MAX_VALUE; long minSegmentBytes = Long.MAX_VALUE;
for(SegmentCommitInfo info : infosSorted) { for(SegmentCommitInfo info : infosSorted) {
final long segBytes = sizeInBytes.get(info); final long segBytes = sizeInBytes.get(info);
if (verbose(writer)) { if (verbose(mergeContext)) {
String extra = merging.contains(info) ? " [merging]" : ""; String extra = merging.contains(info) ? " [merging]" : "";
if (segBytes >= maxMergedSegmentBytes/2.0) { if (segBytes >= maxMergedSegmentBytes/2.0) {
extra += " [skip: too large]"; extra += " [skip: too large]";
} else if (segBytes < floorSegmentBytes) { } else if (segBytes < floorSegmentBytes) {
extra += " [floored]"; extra += " [floored]";
} }
message(" seg=" + writer.segString(info) + " size=" + String.format(Locale.ROOT, "%.3f", segBytes/1024/1024.) + " MB" + extra, writer); message(" seg=" + segString(mergeContext, Collections.singleton(info)) + " size=" + String.format(Locale.ROOT, "%.3f", segBytes/1024/1024.) + " MB" + extra, mergeContext);
} }
minSegmentBytes = Math.min(segBytes, minSegmentBytes); minSegmentBytes = Math.min(segBytes, minSegmentBytes);
@ -372,8 +373,8 @@ public class TieredMergePolicy extends MergePolicy {
final boolean maxMergeIsRunning = mergingBytes >= maxMergedSegmentBytes; final boolean maxMergeIsRunning = mergingBytes >= maxMergedSegmentBytes;
if (verbose(writer)) { if (verbose(mergeContext)) {
message(" allowedSegmentCount=" + allowedSegCountInt + " vs count=" + infosSorted.size() + " (eligible count=" + eligible.size() + ") tooBigCount=" + tooBigCount, writer); message(" allowedSegmentCount=" + allowedSegCountInt + " vs count=" + infosSorted.size() + " (eligible count=" + eligible.size() + ") tooBigCount=" + tooBigCount, mergeContext);
} }
if (eligible.size() == 0) { if (eligible.size() == 0) {
@ -417,9 +418,9 @@ public class TieredMergePolicy extends MergePolicy {
// segments, and already pre-excluded the too-large segments: // segments, and already pre-excluded the too-large segments:
assert candidate.size() > 0; assert candidate.size() > 0;
final MergeScore score = score(candidate, hitTooLarge, mergingBytes, writer, sizeInBytes); final MergeScore score = score(candidate, hitTooLarge, sizeInBytes);
if (verbose(writer)) { if (verbose(mergeContext)) {
message(" maybe=" + writer.segString(candidate) + " score=" + score.getScore() + " " + score.getExplanation() + " tooLarge=" + hitTooLarge + " size=" + String.format(Locale.ROOT, "%.3f MB", totAfterMergeBytes/1024./1024.), writer); message(" maybe=" + segString(mergeContext, candidate) + " score=" + score.getScore() + " " + score.getExplanation() + " tooLarge=" + hitTooLarge + " size=" + String.format(Locale.ROOT, "%.3f MB", totAfterMergeBytes/1024./1024.), mergeContext);
} }
// If we are already running a max sized merge // If we are already running a max sized merge
@ -441,8 +442,8 @@ public class TieredMergePolicy extends MergePolicy {
spec.add(merge); spec.add(merge);
toBeMerged.addAll(merge.segments); toBeMerged.addAll(merge.segments);
if (verbose(writer)) { if (verbose(mergeContext)) {
message(" add merge=" + writer.segString(merge.segments) + " size=" + String.format(Locale.ROOT, "%.3f MB", bestMergeBytes/1024./1024.) + " score=" + String.format(Locale.ROOT, "%.3f", bestScore.getScore()) + " " + bestScore.getExplanation() + (bestTooLarge ? " [max merge]" : ""), writer); message(" add merge=" + segString(mergeContext, merge.segments) + " size=" + String.format(Locale.ROOT, "%.3f MB", bestMergeBytes/1024./1024.) + " score=" + String.format(Locale.ROOT, "%.3f", bestScore.getScore()) + " " + bestScore.getExplanation() + (bestTooLarge ? " [max merge]" : ""), mergeContext);
} }
} else { } else {
return spec; return spec;
@ -454,7 +455,7 @@ public class TieredMergePolicy extends MergePolicy {
} }
/** Expert: scores one merge; subclasses can override. */ /** Expert: scores one merge; subclasses can override. */
protected MergeScore score(List<SegmentCommitInfo> candidate, boolean hitTooLarge, long mergingBytes, IndexWriter writer, Map<SegmentCommitInfo, Long> sizeInBytes) throws IOException { protected MergeScore score(List<SegmentCommitInfo> candidate, boolean hitTooLarge, Map<SegmentCommitInfo, Long> sizeInBytes) throws IOException {
long totBeforeMergeBytes = 0; long totBeforeMergeBytes = 0;
long totAfterMergeBytes = 0; long totAfterMergeBytes = 0;
long totAfterMergeBytesFloored = 0; long totAfterMergeBytesFloored = 0;
@ -513,14 +514,14 @@ public class TieredMergePolicy extends MergePolicy {
} }
@Override @Override
public MergeSpecification findForcedMerges(SegmentInfos infos, int maxSegmentCount, Map<SegmentCommitInfo,Boolean> segmentsToMerge, IndexWriter writer) throws IOException { public MergeSpecification findForcedMerges(SegmentInfos infos, int maxSegmentCount, Map<SegmentCommitInfo,Boolean> segmentsToMerge, MergeContext mergeContext) throws IOException {
if (verbose(writer)) { if (verbose(mergeContext)) {
message("findForcedMerges maxSegmentCount=" + maxSegmentCount + " infos=" + writer.segString(infos) + " segmentsToMerge=" + segmentsToMerge, writer); message("findForcedMerges maxSegmentCount=" + maxSegmentCount + " infos=" + segString(mergeContext, infos) + " segmentsToMerge=" + segmentsToMerge, mergeContext);
} }
List<SegmentCommitInfo> eligible = new ArrayList<>(); List<SegmentCommitInfo> eligible = new ArrayList<>();
boolean forceMergeRunning = false; boolean forceMergeRunning = false;
final Set<SegmentCommitInfo> merging = writer.getMergingSegments(); final Set<SegmentCommitInfo> merging = mergeContext.getMergingSegments();
boolean segmentIsOriginal = false; boolean segmentIsOriginal = false;
for(SegmentCommitInfo info : infos) { for(SegmentCommitInfo info : infos) {
final Boolean isOriginal = segmentsToMerge.get(info); final Boolean isOriginal = segmentsToMerge.get(info);
@ -541,21 +542,21 @@ public class TieredMergePolicy extends MergePolicy {
// The size can change concurrently while we are running here, because deletes // The size can change concurrently while we are running here, because deletes
// are now applied concurrently, and this can piss off TimSort! So we // are now applied concurrently, and this can piss off TimSort! So we
// call size() once per segment and sort by that: // call size() once per segment and sort by that:
Map<SegmentCommitInfo,Long> sizeInBytes = getSegmentSizes(writer, eligible); Map<SegmentCommitInfo,Long> sizeInBytes = getSegmentSizes(mergeContext, eligible);
if ((maxSegmentCount > 1 && eligible.size() <= maxSegmentCount) || if ((maxSegmentCount > 1 && eligible.size() <= maxSegmentCount) ||
(maxSegmentCount == 1 && eligible.size() == 1 && (!segmentIsOriginal || isMerged(infos, eligible.get(0), writer)))) { (maxSegmentCount == 1 && eligible.size() == 1 && (!segmentIsOriginal || isMerged(infos, eligible.get(0), mergeContext)))) {
if (verbose(writer)) { if (verbose(mergeContext)) {
message("already merged", writer); message("already merged", mergeContext);
} }
return null; return null;
} }
eligible.sort(new SegmentByteSizeDescending(sizeInBytes)); eligible.sort(new SegmentByteSizeDescending(sizeInBytes));
if (verbose(writer)) { if (verbose(mergeContext)) {
message("eligible=" + eligible, writer); message("eligible=" + eligible, mergeContext);
message("forceMergeRunning=" + forceMergeRunning, writer); message("forceMergeRunning=" + forceMergeRunning, mergeContext);
} }
int end = eligible.size(); int end = eligible.size();
@ -568,8 +569,8 @@ public class TieredMergePolicy extends MergePolicy {
spec = new MergeSpecification(); spec = new MergeSpecification();
} }
final OneMerge merge = new OneMerge(eligible.subList(end-maxMergeAtOnceExplicit, end)); final OneMerge merge = new OneMerge(eligible.subList(end-maxMergeAtOnceExplicit, end));
if (verbose(writer)) { if (verbose(mergeContext)) {
message("add merge=" + writer.segString(merge.segments), writer); message("add merge=" + segString(mergeContext, merge.segments), mergeContext);
} }
spec.add(merge); spec.add(merge);
end -= maxMergeAtOnceExplicit; end -= maxMergeAtOnceExplicit;
@ -579,8 +580,8 @@ public class TieredMergePolicy extends MergePolicy {
// Do final merge // Do final merge
final int numToMerge = end - maxSegmentCount + 1; final int numToMerge = end - maxSegmentCount + 1;
final OneMerge merge = new OneMerge(eligible.subList(end-numToMerge, end)); final OneMerge merge = new OneMerge(eligible.subList(end-numToMerge, end));
if (verbose(writer)) { if (verbose(mergeContext)) {
message("add final merge=" + merge.segString(), writer); message("add final merge=" + merge.segString(), mergeContext);
} }
spec = new MergeSpecification(); spec = new MergeSpecification();
spec.add(merge); spec.add(merge);
@ -590,14 +591,16 @@ public class TieredMergePolicy extends MergePolicy {
} }
@Override @Override
public MergeSpecification findForcedDeletesMerges(SegmentInfos infos, IndexWriter writer) throws IOException { public MergeSpecification findForcedDeletesMerges(SegmentInfos infos, MergeContext mergeContext) throws IOException {
if (verbose(writer)) { if (verbose(mergeContext)) {
message("findForcedDeletesMerges infos=" + writer.segString(infos) + " forceMergeDeletesPctAllowed=" + forceMergeDeletesPctAllowed, writer); message("findForcedDeletesMerges infos=" + segString(mergeContext, infos) + " forceMergeDeletesPctAllowed=" + forceMergeDeletesPctAllowed, mergeContext);
} }
final List<SegmentCommitInfo> eligible = new ArrayList<>(); final List<SegmentCommitInfo> eligible = new ArrayList<>();
final Set<SegmentCommitInfo> merging = writer.getMergingSegments(); final Set<SegmentCommitInfo> merging = mergeContext.getMergingSegments();
for(SegmentCommitInfo info : infos) { for(SegmentCommitInfo info : infos) {
double pctDeletes = 100.*((double) writer.numDeletesToMerge(info))/info.info.maxDoc(); int delCount = mergeContext.numDeletesToMerge(info);
assert assertDelCount(delCount, info);
double pctDeletes = 100.*((double) delCount)/info.info.maxDoc();
if (pctDeletes > forceMergeDeletesPctAllowed && !merging.contains(info)) { if (pctDeletes > forceMergeDeletesPctAllowed && !merging.contains(info)) {
eligible.add(info); eligible.add(info);
} }
@ -610,12 +613,12 @@ public class TieredMergePolicy extends MergePolicy {
// The size can change concurrently while we are running here, because deletes // The size can change concurrently while we are running here, because deletes
// are now applied concurrently, and this can piss off TimSort! So we // are now applied concurrently, and this can piss off TimSort! So we
// call size() once per segment and sort by that: // call size() once per segment and sort by that:
Map<SegmentCommitInfo,Long> sizeInBytes = getSegmentSizes(writer, infos.asList()); Map<SegmentCommitInfo,Long> sizeInBytes = getSegmentSizes(mergeContext, infos.asList());
eligible.sort(new SegmentByteSizeDescending(sizeInBytes)); eligible.sort(new SegmentByteSizeDescending(sizeInBytes));
if (verbose(writer)) { if (verbose(mergeContext)) {
message("eligible=" + eligible, writer); message("eligible=" + eligible, mergeContext);
} }
int start = 0; int start = 0;
@ -631,8 +634,8 @@ public class TieredMergePolicy extends MergePolicy {
} }
final OneMerge merge = new OneMerge(eligible.subList(start, end)); final OneMerge merge = new OneMerge(eligible.subList(start, end));
if (verbose(writer)) { if (verbose(mergeContext)) {
message("add merge=" + writer.segString(merge.segments), writer); message("add merge=" + segString(mergeContext, merge.segments), mergeContext);
} }
spec.add(merge); spec.add(merge);
start = end; start = end;
@ -645,14 +648,6 @@ public class TieredMergePolicy extends MergePolicy {
return Math.max(floorSegmentBytes, bytes); return Math.max(floorSegmentBytes, bytes);
} }
private boolean verbose(IndexWriter writer) {
return writer != null && writer.infoStream.isEnabled("TMP");
}
private void message(String message, IndexWriter writer) {
writer.infoStream.message("TMP", message);
}
@Override @Override
public String toString() { public String toString() {
StringBuilder sb = new StringBuilder("[" + getClass().getSimpleName() + ": "); StringBuilder sb = new StringBuilder("[" + getClass().getSimpleName() + ": ");

View File

@ -66,12 +66,12 @@ public class UpgradeIndexMergePolicy extends FilterMergePolicy {
} }
@Override @Override
public MergeSpecification findMerges(MergeTrigger mergeTrigger, SegmentInfos segmentInfos, IndexWriter writer) throws IOException { public MergeSpecification findMerges(MergeTrigger mergeTrigger, SegmentInfos segmentInfos, MergeContext mergeContext) throws IOException {
return in.findMerges(null, segmentInfos, writer); return in.findMerges(null, segmentInfos, mergeContext);
} }
@Override @Override
public MergeSpecification findForcedMerges(SegmentInfos segmentInfos, int maxSegmentCount, Map<SegmentCommitInfo,Boolean> segmentsToMerge, IndexWriter writer) throws IOException { public MergeSpecification findForcedMerges(SegmentInfos segmentInfos, int maxSegmentCount, Map<SegmentCommitInfo,Boolean> segmentsToMerge, MergeContext mergeContext) throws IOException {
// first find all old segments // first find all old segments
final Map<SegmentCommitInfo,Boolean> oldSegments = new HashMap<>(); final Map<SegmentCommitInfo,Boolean> oldSegments = new HashMap<>();
for (final SegmentCommitInfo si : segmentInfos) { for (final SegmentCommitInfo si : segmentInfos) {
@ -81,14 +81,14 @@ public class UpgradeIndexMergePolicy extends FilterMergePolicy {
} }
} }
if (verbose(writer)) { if (verbose(mergeContext)) {
message("findForcedMerges: segmentsToUpgrade=" + oldSegments, writer); message("findForcedMerges: segmentsToUpgrade=" + oldSegments, mergeContext);
} }
if (oldSegments.isEmpty()) if (oldSegments.isEmpty())
return null; return null;
MergeSpecification spec = in.findForcedMerges(segmentInfos, maxSegmentCount, oldSegments, writer); MergeSpecification spec = in.findForcedMerges(segmentInfos, maxSegmentCount, oldSegments, mergeContext);
if (spec != null) { if (spec != null) {
// remove all segments that are in merge specification from oldSegments, // remove all segments that are in merge specification from oldSegments,
@ -100,9 +100,9 @@ public class UpgradeIndexMergePolicy extends FilterMergePolicy {
} }
if (!oldSegments.isEmpty()) { if (!oldSegments.isEmpty()) {
if (verbose(writer)) { if (verbose(mergeContext)) {
message("findForcedMerges: " + in.getClass().getSimpleName() + message("findForcedMerges: " + in.getClass().getSimpleName() +
" does not want to merge all old segments, merge remaining ones into new segment: " + oldSegments, writer); " does not want to merge all old segments, merge remaining ones into new segment: " + oldSegments, mergeContext);
} }
final List<SegmentCommitInfo> newInfos = new ArrayList<>(); final List<SegmentCommitInfo> newInfos = new ArrayList<>();
for (final SegmentCommitInfo si : segmentInfos) { for (final SegmentCommitInfo si : segmentInfos) {
@ -120,11 +120,4 @@ public class UpgradeIndexMergePolicy extends FilterMergePolicy {
return spec; return spec;
} }
private boolean verbose(IndexWriter writer) {
return writer != null && writer.infoStream.isEnabled("UPGMP");
}
private void message(String message, IndexWriter writer) {
writer.infoStream.message("UPGMP", message);
}
} }

View File

@ -593,28 +593,28 @@ public class TestDemoParallelLeafReader extends LuceneTestCase {
@Override @Override
public MergeSpecification findMerges(MergeTrigger mergeTrigger, public MergeSpecification findMerges(MergeTrigger mergeTrigger,
SegmentInfos segmentInfos, IndexWriter writer) throws IOException { SegmentInfos segmentInfos, MergeContext mergeContext) throws IOException {
return wrap(in.findMerges(mergeTrigger, segmentInfos, writer)); return wrap(in.findMerges(mergeTrigger, segmentInfos, mergeContext));
} }
@Override @Override
public MergeSpecification findForcedMerges(SegmentInfos segmentInfos, public MergeSpecification findForcedMerges(SegmentInfos segmentInfos,
int maxSegmentCount, Map<SegmentCommitInfo,Boolean> segmentsToMerge, IndexWriter writer) int maxSegmentCount, Map<SegmentCommitInfo,Boolean> segmentsToMerge, MergeContext mergeContext)
throws IOException { throws IOException {
// TODO: do we need to force-force this? Ie, wrapped MP may think index is already optimized, yet maybe its schemaGen is old? need test! // TODO: do we need to force-force this? Ie, wrapped MP may think index is already optimized, yet maybe its schemaGen is old? need test!
return wrap(in.findForcedMerges(segmentInfos, maxSegmentCount, segmentsToMerge, writer)); return wrap(in.findForcedMerges(segmentInfos, maxSegmentCount, segmentsToMerge, mergeContext));
} }
@Override @Override
public MergeSpecification findForcedDeletesMerges(SegmentInfos segmentInfos, IndexWriter writer) public MergeSpecification findForcedDeletesMerges(SegmentInfos segmentInfos, MergeContext mergeContext)
throws IOException { throws IOException {
return wrap(in.findForcedDeletesMerges(segmentInfos, writer)); return wrap(in.findForcedDeletesMerges(segmentInfos, mergeContext));
} }
@Override @Override
public boolean useCompoundFile(SegmentInfos segments, public boolean useCompoundFile(SegmentInfos segments,
SegmentCommitInfo newSegment, IndexWriter writer) throws IOException { SegmentCommitInfo newSegment, MergeContext mergeContext) throws IOException {
return in.useCompoundFile(segments, newSegment, writer); return in.useCompoundFile(segments, newSegment, mergeContext);
} }
@Override @Override

View File

@ -23,9 +23,9 @@ import org.apache.lucene.util.LuceneTestCase;
public class TestFilterMergePolicy extends LuceneTestCase { public class TestFilterMergePolicy extends LuceneTestCase {
public void testMethodsOverridden() throws Exception { public void testMethodsOverridden() {
for (Method m : MergePolicy.class.getDeclaredMethods()) { for (Method m : MergePolicy.class.getDeclaredMethods()) {
if (Modifier.isFinal(m.getModifiers())) continue; if (Modifier.isFinal(m.getModifiers()) || Modifier.isPrivate(m.getModifiers())) continue;
try { try {
FilterMergePolicy.class.getDeclaredMethod(m.getName(), m.getParameterTypes()); FilterMergePolicy.class.getDeclaredMethod(m.getName(), m.getParameterTypes());
} catch (NoSuchMethodException e) { } catch (NoSuchMethodException e) {

View File

@ -48,19 +48,19 @@ public class TestOneMergeWrappingMergePolicy extends LuceneTestCase {
} }
@Override @Override
public MergePolicy.MergeSpecification findMerges(MergeTrigger mergeTrigger, SegmentInfos segmentInfos, IndexWriter writer) public MergePolicy.MergeSpecification findMerges(MergeTrigger mergeTrigger, SegmentInfos segmentInfos, MergeContext mergeContext)
throws IOException { throws IOException {
return merges; return merges;
} }
@Override @Override
public MergePolicy.MergeSpecification findForcedMerges(SegmentInfos segmentInfos, int maxSegmentCount, public MergePolicy.MergeSpecification findForcedMerges(SegmentInfos segmentInfos, int maxSegmentCount,
Map<SegmentCommitInfo,Boolean> segmentsToMerge, IndexWriter writer) throws IOException { Map<SegmentCommitInfo,Boolean> segmentsToMerge, MergeContext mergeContext) throws IOException {
return forcedMerges; return forcedMerges;
} }
@Override @Override
public MergePolicy.MergeSpecification findForcedDeletesMerges(SegmentInfos segmentInfos, IndexWriter writer) public MergePolicy.MergeSpecification findForcedDeletesMerges(SegmentInfos segmentInfos, MergeContext mergeContext)
throws IOException { throws IOException {
return forcedDeletesMerges; return forcedDeletesMerges;
} }

View File

@ -253,7 +253,7 @@ public class TestPerSegmentDeletes extends LuceneTestCase {
} }
@Override @Override
public MergeSpecification findMerges(MergeTrigger mergeTrigger, SegmentInfos segmentInfos, IndexWriter writer) public MergeSpecification findMerges(MergeTrigger mergeTrigger, SegmentInfos segmentInfos, MergeContext mergeContext)
throws IOException { throws IOException {
MergeSpecification ms = new MergeSpecification(); MergeSpecification ms = new MergeSpecification();
if (doMerge) { if (doMerge) {
@ -267,19 +267,19 @@ public class TestPerSegmentDeletes extends LuceneTestCase {
@Override @Override
public MergeSpecification findForcedMerges(SegmentInfos segmentInfos, public MergeSpecification findForcedMerges(SegmentInfos segmentInfos,
int maxSegmentCount, Map<SegmentCommitInfo,Boolean> segmentsToMerge, IndexWriter writer) int maxSegmentCount, Map<SegmentCommitInfo,Boolean> segmentsToMerge, MergeContext mergeContext)
throws IOException { throws IOException {
return null; return null;
} }
@Override @Override
public MergeSpecification findForcedDeletesMerges( public MergeSpecification findForcedDeletesMerges(
SegmentInfos segmentInfos, IndexWriter writer) throws IOException { SegmentInfos segmentInfos, MergeContext mergeContext) throws IOException {
return null; return null;
} }
@Override @Override
public boolean useCompoundFile(SegmentInfos segments, SegmentCommitInfo newSegment, IndexWriter writer) { public boolean useCompoundFile(SegmentInfos segments, SegmentCommitInfo newSegment, MergeContext mergeContext) {
return useCompoundFile; return useCompoundFile;
} }
} }

View File

@ -53,7 +53,7 @@ public class AlcoholicMergePolicy extends LogMergePolicy {
@Override @Override
//@BlackMagic(level=Voodoo); //@BlackMagic(level=Voodoo);
protected long size(SegmentCommitInfo info, IndexWriter writer) throws IOException { protected long size(SegmentCommitInfo info, MergeContext mergeContext) throws IOException {
int hourOfDay = calendar.get(Calendar.HOUR_OF_DAY); int hourOfDay = calendar.get(Calendar.HOUR_OF_DAY);
if (hourOfDay < 6 || if (hourOfDay < 6 ||
hourOfDay > 20 || hourOfDay > 20 ||

View File

@ -19,11 +19,19 @@ package org.apache.lucene.index;
import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.NullInfoStream;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.Version;
import java.io.IOException; import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicBoolean;
import java.util.function.ToIntFunction;
/** /**
* Base test case for {@link MergePolicy}. * Base test case for {@link MergePolicy}.
@ -80,5 +88,69 @@ public abstract class BaseMergePolicyTestCase extends LuceneTestCase {
writer.close(); writer.close();
} }
} }
public void testFindForcedDeletesMerges() throws IOException {
MergePolicy mp = mergePolicy();
SegmentInfos infos = new SegmentInfos(Version.LATEST.major);
try (Directory directory = newDirectory()) {
MergePolicy.MergeContext context = new MockMergeContext(s -> 0);
int numSegs = random().nextInt(10);
for (int i = 0; i < numSegs; i++) {
SegmentInfo info = new SegmentInfo(
directory, // dir
Version.LATEST, // version
Version.LATEST, // min version
TestUtil.randomSimpleString(random()), // name
random().nextInt(Integer.MAX_VALUE), // maxDoc
random().nextBoolean(), // isCompoundFile
null, // codec
Collections.emptyMap(), // diagnostics
TestUtil.randomSimpleString(// id
random(),
StringHelper.ID_LENGTH,
StringHelper.ID_LENGTH).getBytes(StandardCharsets.US_ASCII),
Collections.emptyMap(), // attributes
null /* indexSort */);
info.setFiles(Collections.emptyList());
infos.add(new SegmentCommitInfo(info, random().nextInt(1), -1, -1, -1));
}
MergePolicy.MergeSpecification forcedDeletesMerges = mp.findForcedDeletesMerges(infos, context);
if (forcedDeletesMerges != null) {
assertEquals(0, forcedDeletesMerges.merges.size());
}
}
}
/**
* Simple mock merge context for tests
*/
public static final class MockMergeContext implements MergePolicy.MergeContext {
private final ToIntFunction<SegmentCommitInfo> numDeletesFunc;
private final InfoStream infoStream = new NullInfoStream();
public MockMergeContext(ToIntFunction<SegmentCommitInfo> numDeletesFunc) {
this.numDeletesFunc = numDeletesFunc;
}
@Override
public int numDeletesToMerge(SegmentCommitInfo info) {
return numDeletesFunc.applyAsInt(info);
}
@Override
public int numDeletedDocs(SegmentCommitInfo info) {
return numDeletesToMerge(info);
}
@Override
public InfoStream getInfoStream() {
return infoStream;
}
@Override
public Set<SegmentCommitInfo> getMergingSegments() {
return Collections.emptySet();
}
}
} }

View File

@ -17,10 +17,6 @@
package org.apache.lucene.index; package org.apache.lucene.index;
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.MergeTrigger;
import org.apache.lucene.index.SegmentInfos;
/** /**
* A {@link MergePolicy} that only returns forced merges. * A {@link MergePolicy} that only returns forced merges.
@ -38,7 +34,7 @@ public final class ForceMergePolicy extends FilterMergePolicy {
} }
@Override @Override
public MergeSpecification findMerges(MergeTrigger mergeTrigger, SegmentInfos segmentInfos, IndexWriter writer) public MergeSpecification findMerges(MergeTrigger mergeTrigger, SegmentInfos segmentInfos, MergeContext mergeContext)
throws IOException { throws IOException {
return null; return null;
} }

View File

@ -49,14 +49,12 @@ public class MockRandomMergePolicy extends MergePolicy {
} }
@Override @Override
public MergeSpecification findMerges(MergeTrigger mergeTrigger, SegmentInfos segmentInfos, IndexWriter writer) { public MergeSpecification findMerges(MergeTrigger mergeTrigger, SegmentInfos segmentInfos, MergeContext mergeContext) {
MergeSpecification mergeSpec = null; MergeSpecification mergeSpec = null;
//System.out.println("MRMP: findMerges sis=" + segmentInfos); //System.out.println("MRMP: findMerges sis=" + segmentInfos);
int numSegments = segmentInfos.size();
List<SegmentCommitInfo> segments = new ArrayList<>(); List<SegmentCommitInfo> segments = new ArrayList<>();
final Set<SegmentCommitInfo> merging = writer.getMergingSegments(); final Set<SegmentCommitInfo> merging = mergeContext.getMergingSegments();
for(SegmentCommitInfo sipc : segmentInfos) { for(SegmentCommitInfo sipc : segmentInfos) {
if (!merging.contains(sipc)) { if (!merging.contains(sipc)) {
@ -64,7 +62,7 @@ public class MockRandomMergePolicy extends MergePolicy {
} }
} }
numSegments = segments.size(); int numSegments = segments.size();
if (numSegments > 1 && (numSegments > 30 || random.nextInt(5) == 3)) { if (numSegments > 1 && (numSegments > 30 || random.nextInt(5) == 3)) {
@ -85,7 +83,7 @@ public class MockRandomMergePolicy extends MergePolicy {
@Override @Override
public MergeSpecification findForcedMerges( public MergeSpecification findForcedMerges(
SegmentInfos segmentInfos, int maxSegmentCount, Map<SegmentCommitInfo,Boolean> segmentsToMerge, IndexWriter writer) SegmentInfos segmentInfos, int maxSegmentCount, Map<SegmentCommitInfo,Boolean> segmentsToMerge, MergeContext mergeContext)
throws IOException { throws IOException {
final List<SegmentCommitInfo> eligibleSegments = new ArrayList<>(); final List<SegmentCommitInfo> eligibleSegments = new ArrayList<>();
@ -97,7 +95,7 @@ public class MockRandomMergePolicy extends MergePolicy {
//System.out.println("MRMP: findMerges sis=" + segmentInfos + " eligible=" + eligibleSegments); //System.out.println("MRMP: findMerges sis=" + segmentInfos + " eligible=" + eligibleSegments);
MergeSpecification mergeSpec = null; MergeSpecification mergeSpec = null;
if (eligibleSegments.size() > 1 || (eligibleSegments.size() == 1 && isMerged(segmentInfos, eligibleSegments.get(0), writer) == false)) { if (eligibleSegments.size() > 1 || (eligibleSegments.size() == 1 && isMerged(segmentInfos, eligibleSegments.get(0), mergeContext) == false)) {
mergeSpec = new MergeSpecification(); mergeSpec = new MergeSpecification();
// Already shuffled having come out of a set but // Already shuffled having come out of a set but
// shuffle again for good measure: // shuffle again for good measure:
@ -126,12 +124,12 @@ public class MockRandomMergePolicy extends MergePolicy {
} }
@Override @Override
public MergeSpecification findForcedDeletesMerges(SegmentInfos segmentInfos, IndexWriter writer) throws IOException { public MergeSpecification findForcedDeletesMerges(SegmentInfos segmentInfos, MergeContext mergeContext) throws IOException {
return findMerges(null, segmentInfos, writer); return findMerges(null, segmentInfos, mergeContext);
} }
@Override @Override
public boolean useCompoundFile(SegmentInfos infos, SegmentCommitInfo mergedInfo, IndexWriter writer) throws IOException { public boolean useCompoundFile(SegmentInfos infos, SegmentCommitInfo mergedInfo, MergeContext mergeContext) throws IOException {
// 80% of the time we create CFS: // 80% of the time we create CFS:
return random.nextInt(5) != 1; return random.nextInt(5) != 1;
} }