assert that TMP always picks a non-empty candidate; clean up code a bit; consistently use super.size(info, writer) not info.sizeInBytes

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1630049 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2014-10-08 09:39:24 +00:00
parent 511996c5ba
commit 808ae52722
2 changed files with 25 additions and 12 deletions

View File

@ -117,8 +117,9 @@ public abstract class MergePolicy {
* @param segments List of {@link SegmentCommitInfo}s * @param segments List of {@link SegmentCommitInfo}s
* to be merged. */ * to be merged. */
public OneMerge(List<SegmentCommitInfo> segments) { public OneMerge(List<SegmentCommitInfo> segments) {
if (0 == segments.size()) if (0 == segments.size()) {
throw new RuntimeException("segments must include at least one segment"); throw new RuntimeException("segments must include at least one segment");
}
// clone the list, as the in list may be based off original SegmentInfos and may be modified // clone the list, as the in list may be based off original SegmentInfos and may be modified
this.segments = new ArrayList<>(segments); this.segments = new ArrayList<>(segments);
int count = 0; int count = 0;
@ -239,14 +240,17 @@ public abstract class MergePolicy {
StringBuilder b = new StringBuilder(); StringBuilder b = new StringBuilder();
final int numSegments = segments.size(); final int numSegments = segments.size();
for(int i=0;i<numSegments;i++) { for(int i=0;i<numSegments;i++) {
if (i > 0) b.append(' '); if (i > 0) {
b.append(' ');
}
b.append(segments.get(i).toString(dir, 0)); b.append(segments.get(i).toString(dir, 0));
} }
if (info != null) { if (info != null) {
b.append(" into ").append(info.info.name); b.append(" into ").append(info.info.name);
} }
if (maxNumSegments != -1) if (maxNumSegments != -1) {
b.append(" [maxNumSegments=" + maxNumSegments + "]"); b.append(" [maxNumSegments=" + maxNumSegments + "]");
}
if (aborted) { if (aborted) {
b.append(" [ABORTED]"); b.append(" [ABORTED]");
} }
@ -312,8 +316,9 @@ public abstract class MergePolicy {
StringBuilder b = new StringBuilder(); StringBuilder b = new StringBuilder();
b.append("MergeSpec:\n"); b.append("MergeSpec:\n");
final int count = merges.size(); final int count = merges.size();
for(int i=0;i<count;i++) for(int i=0;i<count;i++) {
b.append(" ").append(1 + i).append(": ").append(merges.get(i).segString(dir)); b.append(" ").append(1 + i).append(": ").append(merges.get(i).segString(dir));
}
return b.toString(); return b.toString();
} }
} }
@ -477,9 +482,9 @@ public abstract class MergePolicy {
protected long size(SegmentCommitInfo info, IndexWriter writer) throws IOException { protected long size(SegmentCommitInfo info, IndexWriter writer) throws IOException {
long byteSize = info.sizeInBytes(); long byteSize = info.sizeInBytes();
int delCount = writer.numDeletedDocs(info); int delCount = writer.numDeletedDocs(info);
double delRatio = (info.info.getDocCount() <= 0 ? 0.0f : ((float)delCount / (float)info.info.getDocCount())); double delRatio = info.info.getDocCount() <= 0 ? 0.0f : (float) delCount / (float) info.info.getDocCount();
assert delRatio <= 1.0; assert delRatio <= 1.0;
return (info.info.getDocCount() <= 0 ? byteSize : (long)(byteSize * (1.0 - delRatio))); return (info.info.getDocCount() <= 0 ? byteSize : (long) (byteSize * (1.0 - delRatio)));
} }
/** Returns true if this single info is already fully merged (has no /** Returns true if this single info is already fully merged (has no
@ -527,7 +532,7 @@ public abstract class MergePolicy {
throw new IllegalArgumentException("maxCFSSegmentSizeMB must be >=0 (got " + v + ")"); throw new IllegalArgumentException("maxCFSSegmentSizeMB must be >=0 (got " + v + ")");
} }
v *= 1024 * 1024; v *= 1024 * 1024;
this.maxCFSSegmentSize = (v > Long.MAX_VALUE) ? Long.MAX_VALUE : (long) v; this.maxCFSSegmentSize = v > Long.MAX_VALUE ? Long.MAX_VALUE : (long) v;
} }
} }

View File

@ -142,7 +142,7 @@ public class TieredMergePolicy extends MergePolicy {
throw new IllegalArgumentException("maxMergedSegmentMB must be >=0 (got " + v + ")"); throw new IllegalArgumentException("maxMergedSegmentMB must be >=0 (got " + v + ")");
} }
v *= 1024 * 1024; v *= 1024 * 1024;
maxMergedSegmentBytes = (v > Long.MAX_VALUE) ? Long.MAX_VALUE : (long) v; maxMergedSegmentBytes = v > Long.MAX_VALUE ? Long.MAX_VALUE : (long) v;
return this; return this;
} }
@ -183,7 +183,7 @@ public class TieredMergePolicy extends MergePolicy {
throw new IllegalArgumentException("floorSegmentMB must be >= 0.0 (got " + v + ")"); throw new IllegalArgumentException("floorSegmentMB must be >= 0.0 (got " + v + ")");
} }
v *= 1024 * 1024; v *= 1024 * 1024;
floorSegmentBytes = (v > Long.MAX_VALUE) ? Long.MAX_VALUE : (long) v; floorSegmentBytes = v > Long.MAX_VALUE ? Long.MAX_VALUE : (long) v;
return this; return this;
} }
@ -314,8 +314,12 @@ public class TieredMergePolicy extends MergePolicy {
// If we have too-large segments, grace them out // If we have too-large segments, grace them out
// of the maxSegmentCount: // of the maxSegmentCount:
int tooBigCount = 0; int tooBigCount = 0;
while (tooBigCount < infosSorted.size() && size(infosSorted.get(tooBigCount), writer) >= maxMergedSegmentBytes/2.0) { while (tooBigCount < infosSorted.size()) {
totIndexBytes -= size(infosSorted.get(tooBigCount), writer); long segBytes = size(infosSorted.get(tooBigCount), writer);
if (segBytes < maxMergedSegmentBytes/2.0) {
break;
}
totIndexBytes -= segBytes;
tooBigCount++; tooBigCount++;
} }
@ -351,7 +355,7 @@ public class TieredMergePolicy extends MergePolicy {
for(int idx = tooBigCount; idx<infosSorted.size(); idx++) { for(int idx = tooBigCount; idx<infosSorted.size(); idx++) {
final SegmentCommitInfo info = infosSorted.get(idx); final SegmentCommitInfo info = infosSorted.get(idx);
if (merging.contains(info)) { if (merging.contains(info)) {
mergingBytes += info.sizeInBytes(); mergingBytes += size(info, writer);
} else if (!toBeMerged.contains(info)) { } else if (!toBeMerged.contains(info)) {
eligible.add(info); eligible.add(info);
} }
@ -400,6 +404,10 @@ public class TieredMergePolicy extends MergePolicy {
totAfterMergeBytes += segBytes; totAfterMergeBytes += segBytes;
} }
// We should never see an empty candidate: we iterated over maxMergeAtOnce
// segments, and already pre-excluded the too-large segments:
assert candidate.size() > 0;
final MergeScore score = score(candidate, hitTooLarge, mergingBytes, writer); final MergeScore score = score(candidate, hitTooLarge, mergingBytes, writer);
if (verbose(writer)) { if (verbose(writer)) {
message(" maybe=" + writer.segString(candidate) + " score=" + score.getScore() + " " + score.getExplanation() + " tooLarge=" + hitTooLarge + " size=" + String.format(Locale.ROOT, "%.3f MB", totAfterMergeBytes/1024./1024.), writer); message(" maybe=" + writer.segString(candidate) + " score=" + score.getScore() + " " + score.getExplanation() + " tooLarge=" + hitTooLarge + " size=" + String.format(Locale.ROOT, "%.3f MB", totAfterMergeBytes/1024./1024.), writer);