LUCENE-3197: don't over-merge if deletes are still arriving while optimize runs

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1137211 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2011-06-18 16:06:44 +00:00
parent 351562bfab
commit 2e48814dff
10 changed files with 64 additions and 40 deletions

View File

@ -527,6 +527,10 @@ Bug fixes
in the superclass Searcher were not using it, leading to strange bugs. in the superclass Searcher were not using it, leading to strange bugs.
(Uwe Schindler, Robert Muir) (Uwe Schindler, Robert Muir)
* LUCENE-3197: Fix core merge policies to not over-merge during
background optimize when documents are still being deleted
concurrently with the optimize (Mike McCandless)
API Changes API Changes
* LUCENE-3208: Renamed protected IndexSearcher.createWeight() to expert * LUCENE-3208: Renamed protected IndexSearcher.createWeight() to expert
@ -536,6 +540,11 @@ API Changes
IndexSearcher. Both deprecated methods will be removed in Lucene 4.0. IndexSearcher. Both deprecated methods will be removed in Lucene 4.0.
(Uwe Schindler, Robert Muir, Yonik Seeley) (Uwe Schindler, Robert Muir, Yonik Seeley)
* LUCENE-3197: MergePolicy.findMergesForOptimize now takes
Map<SegmentInfo,Boolean> instead of Set<SegmentInfo> as the second
argument, so the merge policy knows which segments were originally
present vs produced by an optimizing merge (Mike McCandless)
New Features New Features
* LUCENE-3140: Added experimental FST implementation to Lucene. * LUCENE-3140: Added experimental FST implementation to Lucene.

View File

@ -20,7 +20,7 @@ package org.apache.lucene.index;
import java.io.IOException; import java.io.IOException;
import java.util.Collections; import java.util.Collections;
import java.util.Set; import java.util.Map;
/** /**
* Merge policy that tries to balance not doing large * Merge policy that tries to balance not doing large
@ -105,7 +105,7 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy {
} }
@Override @Override
public MergeSpecification findMergesForOptimize(SegmentInfos infos, int maxNumSegments, Set<SegmentInfo> segmentsToOptimize) throws IOException { public MergeSpecification findMergesForOptimize(SegmentInfos infos, int maxNumSegments, Map<SegmentInfo,Boolean> segmentsToOptimize) throws IOException {
assert maxNumSegments > 0; assert maxNumSegments > 0;
@ -120,8 +120,7 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy {
while(last > 0) { while(last > 0) {
final SegmentInfo info = infos.info(--last); final SegmentInfo info = infos.info(--last);
if (segmentsToOptimize.contains(info)) { if (segmentsToOptimize.containsKey(info)) {
last++; last++;
break; break;
} }

View File

@ -236,7 +236,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
private DocumentsWriter docWriter; private DocumentsWriter docWriter;
final IndexFileDeleter deleter; final IndexFileDeleter deleter;
private Set<SegmentInfo> segmentsToOptimize = new HashSet<SegmentInfo>(); // used by optimize to note those needing optimization private Map<SegmentInfo,Boolean> segmentsToOptimize = new HashMap<SegmentInfo,Boolean>(); // used by optimize to note those needing optimization
private int optimizeMaxNumSegments; private int optimizeMaxNumSegments;
private Lock writeLock; private Lock writeLock;
@ -1664,7 +1664,9 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
synchronized(this) { synchronized(this) {
resetMergeExceptions(); resetMergeExceptions();
segmentsToOptimize.clear(); segmentsToOptimize.clear();
segmentsToOptimize.addAll(segmentInfos.asSet()); for(SegmentInfo info : segmentInfos) {
segmentsToOptimize.put(info, Boolean.TRUE);
}
optimizeMaxNumSegments = maxNumSegments; optimizeMaxNumSegments = maxNumSegments;
// Now mark all pending & running merges as optimize // Now mark all pending & running merges as optimize
@ -1888,7 +1890,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
final MergePolicy.MergeSpecification spec; final MergePolicy.MergeSpecification spec;
if (optimize) { if (optimize) {
spec = mergePolicy.findMergesForOptimize(segmentInfos, maxNumSegmentsOptimize, Collections.unmodifiableSet(segmentsToOptimize)); spec = mergePolicy.findMergesForOptimize(segmentInfos, maxNumSegmentsOptimize, Collections.unmodifiableMap(segmentsToOptimize));
if (spec != null) { if (spec != null) {
final int numMerges = spec.merges.size(); final int numMerges = spec.merges.size();
@ -3042,7 +3044,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
if (merge.optimize) { if (merge.optimize) {
// cascade the optimize: // cascade the optimize:
segmentsToOptimize.add(merge.info); segmentsToOptimize.put(merge.info, Boolean.FALSE);
} }
return true; return true;
@ -3086,7 +3088,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
* *
* @lucene.experimental * @lucene.experimental
*/ */
public final void merge(MergePolicy.OneMerge merge) public void merge(MergePolicy.OneMerge merge)
throws CorruptIndexException, IOException { throws CorruptIndexException, IOException {
boolean success = false; boolean success = false;
@ -3167,7 +3169,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
if (info.dir != directory) { if (info.dir != directory) {
isExternal = true; isExternal = true;
} }
if (segmentsToOptimize.contains(info)) { if (segmentsToOptimize.containsKey(info)) {
merge.optimize = true; merge.optimize = true;
merge.maxNumSegmentsOptimize = optimizeMaxNumSegments; merge.maxNumSegmentsOptimize = optimizeMaxNumSegments;
} }

View File

@ -21,7 +21,7 @@ import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.List; import java.util.List;
import java.util.Set; import java.util.Map;
/** <p>This class implements a {@link MergePolicy} that tries /** <p>This class implements a {@link MergePolicy} that tries
* to merge segments into levels of exponentially * to merge segments into levels of exponentially
@ -201,20 +201,23 @@ public abstract class LogMergePolicy extends MergePolicy {
} }
} }
protected boolean isOptimized(SegmentInfos infos, int maxNumSegments, Set<SegmentInfo> segmentsToOptimize) throws IOException { protected boolean isOptimized(SegmentInfos infos, int maxNumSegments, Map<SegmentInfo,Boolean> segmentsToOptimize) throws IOException {
final int numSegments = infos.size(); final int numSegments = infos.size();
int numToOptimize = 0; int numToOptimize = 0;
SegmentInfo optimizeInfo = null; SegmentInfo optimizeInfo = null;
boolean segmentIsOriginal = false;
for(int i=0;i<numSegments && numToOptimize <= maxNumSegments;i++) { for(int i=0;i<numSegments && numToOptimize <= maxNumSegments;i++) {
final SegmentInfo info = infos.info(i); final SegmentInfo info = infos.info(i);
if (segmentsToOptimize.contains(info)) { final Boolean isOriginal = segmentsToOptimize.get(info);
if (isOriginal != null) {
segmentIsOriginal = isOriginal;
numToOptimize++; numToOptimize++;
optimizeInfo = info; optimizeInfo = info;
} }
} }
return numToOptimize <= maxNumSegments && return numToOptimize <= maxNumSegments &&
(numToOptimize != 1 || isOptimized(optimizeInfo)); (numToOptimize != 1 || !segmentIsOriginal || isOptimized(optimizeInfo));
} }
/** Returns true if this single info is optimized (has no /** Returns true if this single info is optimized (has no
@ -346,7 +349,7 @@ public abstract class LogMergePolicy extends MergePolicy {
* in use may make use of concurrency. */ * in use may make use of concurrency. */
@Override @Override
public MergeSpecification findMergesForOptimize(SegmentInfos infos, public MergeSpecification findMergesForOptimize(SegmentInfos infos,
int maxNumSegments, Set<SegmentInfo> segmentsToOptimize) throws IOException { int maxNumSegments, Map<SegmentInfo,Boolean> segmentsToOptimize) throws IOException {
assert maxNumSegments > 0; assert maxNumSegments > 0;
if (verbose()) { if (verbose()) {
@ -368,7 +371,7 @@ public abstract class LogMergePolicy extends MergePolicy {
int last = infos.size(); int last = infos.size();
while (last > 0) { while (last > 0) {
final SegmentInfo info = infos.info(--last); final SegmentInfo info = infos.info(--last);
if (segmentsToOptimize.contains(info)) { if (segmentsToOptimize.get(info) != null) {
last++; last++;
break; break;
} }

View File

@ -24,7 +24,7 @@ import org.apache.lucene.util.SetOnce.AlreadySetException;
import java.io.IOException; import java.io.IOException;
import java.util.List; import java.util.List;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Set; import java.util.Map;
/** /**
* <p>Expert: a MergePolicy determines the sequence of * <p>Expert: a MergePolicy determines the sequence of
@ -297,10 +297,15 @@ public abstract class MergePolicy implements java.io.Closeable {
* is always 1) * is always 1)
* @param segmentsToOptimize * @param segmentsToOptimize
* contains the specific SegmentInfo instances that must be merged * contains the specific SegmentInfo instances that must be merged
* away. This may be a subset of all SegmentInfos. * away. This may be a subset of all
* SegmentInfos. If the value is True for a
* given SegmentInfo, that means this segment was
* an original segment present in the
* to-be-optimized index; else, it was a segment
* produced by a cascaded merge.
*/ */
public abstract MergeSpecification findMergesForOptimize( public abstract MergeSpecification findMergesForOptimize(
SegmentInfos segmentInfos, int maxSegmentCount, Set<SegmentInfo> segmentsToOptimize) SegmentInfos segmentInfos, int maxSegmentCount, Map<SegmentInfo,Boolean> segmentsToOptimize)
throws CorruptIndexException, IOException; throws CorruptIndexException, IOException;
/** /**

View File

@ -18,7 +18,7 @@ package org.apache.lucene.index;
*/ */
import java.io.IOException; import java.io.IOException;
import java.util.Set; import java.util.Map;
/** /**
* A {@link MergePolicy} which never returns merges to execute (hence it's * A {@link MergePolicy} which never returns merges to execute (hence it's
@ -59,7 +59,7 @@ public final class NoMergePolicy extends MergePolicy {
@Override @Override
public MergeSpecification findMergesForOptimize(SegmentInfos segmentInfos, public MergeSpecification findMergesForOptimize(SegmentInfos segmentInfos,
int maxSegmentCount, Set<SegmentInfo> segmentsToOptimize) int maxSegmentCount, Map<SegmentInfo,Boolean> segmentsToOptimize)
throws CorruptIndexException, IOException { return null; } throws CorruptIndexException, IOException { return null; }
@Override @Override

View File

@ -18,7 +18,7 @@ package org.apache.lucene.index;
*/ */
import java.io.IOException; import java.io.IOException;
import java.util.Set; import java.util.Map;
import java.util.Collection; import java.util.Collection;
import java.util.Collections; import java.util.Collections;
import java.util.HashSet; import java.util.HashSet;
@ -472,7 +472,7 @@ public class TieredMergePolicy extends MergePolicy {
} }
@Override @Override
public MergeSpecification findMergesForOptimize(SegmentInfos infos, int maxSegmentCount, Set<SegmentInfo> segmentsToOptimize) throws IOException { public MergeSpecification findMergesForOptimize(SegmentInfos infos, int maxSegmentCount, Map<SegmentInfo,Boolean> segmentsToOptimize) throws IOException {
if (verbose()) { if (verbose()) {
message("findMergesForOptimize maxSegmentCount=" + maxSegmentCount + " infos=" + writer.get().segString(infos) + " segmentsToOptimize=" + segmentsToOptimize); message("findMergesForOptimize maxSegmentCount=" + maxSegmentCount + " infos=" + writer.get().segString(infos) + " segmentsToOptimize=" + segmentsToOptimize);
} }
@ -480,8 +480,11 @@ public class TieredMergePolicy extends MergePolicy {
List<SegmentInfo> eligible = new ArrayList<SegmentInfo>(); List<SegmentInfo> eligible = new ArrayList<SegmentInfo>();
boolean optimizeMergeRunning = false; boolean optimizeMergeRunning = false;
final Collection<SegmentInfo> merging = writer.get().getMergingSegments(); final Collection<SegmentInfo> merging = writer.get().getMergingSegments();
boolean segmentIsOriginal = false;
for(SegmentInfo info : infos) { for(SegmentInfo info : infos) {
if (segmentsToOptimize.contains(info)) { final Boolean isOriginal = segmentsToOptimize.get(info);
if (isOriginal != null) {
segmentIsOriginal = isOriginal;
if (!merging.contains(info)) { if (!merging.contains(info)) {
eligible.add(info); eligible.add(info);
} else { } else {
@ -495,7 +498,7 @@ public class TieredMergePolicy extends MergePolicy {
} }
if ((maxSegmentCount > 1 && eligible.size() <= maxSegmentCount) || if ((maxSegmentCount > 1 && eligible.size() <= maxSegmentCount) ||
(maxSegmentCount == 1 && eligible.size() == 1 && isOptimized(eligible.get(0)))) { (maxSegmentCount == 1 && eligible.size() == 1 && (!segmentIsOriginal || isOptimized(eligible.get(0))))) {
if (verbose()) { if (verbose()) {
message("already optimized"); message("already optimized");
} }

View File

@ -21,9 +21,9 @@ import org.apache.lucene.util.Constants;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Set; import java.util.Map;
import java.util.HashMap;
/** This {@link MergePolicy} is used for upgrading all existing segments of /** This {@link MergePolicy} is used for upgrading all existing segments of
* an index when calling {@link IndexWriter#optimize()}. * an index when calling {@link IndexWriter#optimize()}.
@ -79,12 +79,13 @@ public class UpgradeIndexMergePolicy extends MergePolicy {
} }
@Override @Override
public MergeSpecification findMergesForOptimize(SegmentInfos segmentInfos, int maxSegmentCount, Set<SegmentInfo> segmentsToOptimize) throws CorruptIndexException, IOException { public MergeSpecification findMergesForOptimize(SegmentInfos segmentInfos, int maxSegmentCount, Map<SegmentInfo,Boolean> segmentsToOptimize) throws CorruptIndexException, IOException {
// first find all old segments // first find all old segments
final HashSet<SegmentInfo> oldSegments = new HashSet<SegmentInfo>(); final Map<SegmentInfo,Boolean> oldSegments = new HashMap<SegmentInfo,Boolean>();
for (final SegmentInfo si : segmentInfos) { for (final SegmentInfo si : segmentInfos) {
if (segmentsToOptimize.contains(si) && shouldUpgradeSegment(si)) { final Boolean v =segmentsToOptimize.get(si);
oldSegments.add(si); if (v != null && shouldUpgradeSegment(si)) {
oldSegments.put(si, v);
} }
} }
@ -93,14 +94,16 @@ public class UpgradeIndexMergePolicy extends MergePolicy {
if (oldSegments.isEmpty()) if (oldSegments.isEmpty())
return null; return null;
MergeSpecification spec = base.findMergesForOptimize(segmentInfos, maxSegmentCount, oldSegments); MergeSpecification spec = base.findMergesForOptimize(segmentInfos, maxSegmentCount, oldSegments);
if (spec != null) { if (spec != null) {
// remove all segments that are in merge specification from oldSegments, // remove all segments that are in merge specification from oldSegments,
// the resulting set contains all segments that are left over // the resulting set contains all segments that are left over
// and will be merged to one additional segment: // and will be merged to one additional segment:
for (final OneMerge om : spec.merges) { for (final OneMerge om : spec.merges) {
oldSegments.removeAll(om.segments); for(SegmentInfo info : om.segments) {
oldSegments.remove(info);
}
} }
} }
@ -110,7 +113,7 @@ public class UpgradeIndexMergePolicy extends MergePolicy {
" does not want to merge all old segments, merge remaining ones into new segment: " + oldSegments); " does not want to merge all old segments, merge remaining ones into new segment: " + oldSegments);
final List<SegmentInfo> newInfos = new ArrayList<SegmentInfo>(); final List<SegmentInfo> newInfos = new ArrayList<SegmentInfo>();
for (final SegmentInfo si : segmentInfos) { for (final SegmentInfo si : segmentInfos) {
if (oldSegments.contains(si)) { if (oldSegments.containsKey(si)) {
newInfos.add(si); newInfos.add(si);
} }
} }

View File

@ -22,7 +22,7 @@ import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.Random; import java.util.Random;
import java.util.Set; import java.util.Map;
import org.apache.lucene.util._TestUtil; import org.apache.lucene.util._TestUtil;
@ -56,12 +56,12 @@ public class MockRandomMergePolicy extends MergePolicy {
@Override @Override
public MergeSpecification findMergesForOptimize( public MergeSpecification findMergesForOptimize(
SegmentInfos segmentInfos, int maxSegmentCount, Set<SegmentInfo> segmentsToOptimize) SegmentInfos segmentInfos, int maxSegmentCount, Map<SegmentInfo,Boolean> segmentsToOptimize)
throws CorruptIndexException, IOException { throws CorruptIndexException, IOException {
final List<SegmentInfo> eligibleSegments = new ArrayList<SegmentInfo>(); final List<SegmentInfo> eligibleSegments = new ArrayList<SegmentInfo>();
for(SegmentInfo info : segmentInfos) { for(SegmentInfo info : segmentInfos) {
if (segmentsToOptimize.contains(info)) { if (segmentsToOptimize.containsKey(info)) {
eligibleSegments.add(info); eligibleSegments.add(info);
} }
} }
@ -85,7 +85,7 @@ public class MockRandomMergePolicy extends MergePolicy {
if (mergeSpec != null) { if (mergeSpec != null) {
for(OneMerge merge : mergeSpec.merges) { for(OneMerge merge : mergeSpec.merges) {
for(SegmentInfo info : merge.segments) { for(SegmentInfo info : merge.segments) {
assert segmentsToOptimize.contains(info); assert segmentsToOptimize.containsKey(info);
} }
} }
} }

View File

@ -21,7 +21,7 @@ import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Random; import java.util.Random;
import java.util.Set; import java.util.Map;
import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.index.TermsEnum.SeekStatus; import org.apache.lucene.index.TermsEnum.SeekStatus;
@ -271,7 +271,7 @@ public class TestPerSegmentDeletes extends LuceneTestCase {
@Override @Override
public MergeSpecification findMergesForOptimize(SegmentInfos segmentInfos, public MergeSpecification findMergesForOptimize(SegmentInfos segmentInfos,
int maxSegmentCount, Set<SegmentInfo> segmentsToOptimize) int maxSegmentCount, Map<SegmentInfo,Boolean> segmentsToOptimize)
throws CorruptIndexException, IOException { throws CorruptIndexException, IOException {
return null; return null;
} }