mirror of https://github.com/apache/lucene.git
LUCENE-3197: don't over-merge if deletes are still arriving while optimize runs
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1137211 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
351562bfab
commit
2e48814dff
|
@ -527,6 +527,10 @@ Bug fixes
|
|||
in the superclass Searcher were not using it, leading to strange bugs.
|
||||
(Uwe Schindler, Robert Muir)
|
||||
|
||||
* LUCENE-3197: Fix core merge policies to not over-merge during
|
||||
background optimize when documents are still being deleted
|
||||
concurrently with the optimize (Mike McCandless)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-3208: Renamed protected IndexSearcher.createWeight() to expert
|
||||
|
@ -536,6 +540,11 @@ API Changes
|
|||
IndexSearcher. Both deprecated methods will be removed in Lucene 4.0.
|
||||
(Uwe Schindler, Robert Muir, Yonik Seeley)
|
||||
|
||||
* LUCENE-3197: MergePolicy.findMergesForOptimize now takes
|
||||
Map<SegmentInfo,Boolean> instead of Set<SegmentInfo> as the second
|
||||
argument, so the merge policy knows which segments were originally
|
||||
present vs produced by an optimizing merge (Mike McCandless)
|
||||
|
||||
New Features
|
||||
|
||||
* LUCENE-3140: Added experimental FST implementation to Lucene.
|
||||
|
|
|
@ -20,7 +20,7 @@ package org.apache.lucene.index;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.Set;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Merge policy that tries to balance not doing large
|
||||
|
@ -105,7 +105,7 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy {
|
|||
}
|
||||
|
||||
@Override
|
||||
public MergeSpecification findMergesForOptimize(SegmentInfos infos, int maxNumSegments, Set<SegmentInfo> segmentsToOptimize) throws IOException {
|
||||
public MergeSpecification findMergesForOptimize(SegmentInfos infos, int maxNumSegments, Map<SegmentInfo,Boolean> segmentsToOptimize) throws IOException {
|
||||
|
||||
assert maxNumSegments > 0;
|
||||
|
||||
|
@ -120,8 +120,7 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy {
|
|||
while(last > 0) {
|
||||
|
||||
final SegmentInfo info = infos.info(--last);
|
||||
if (segmentsToOptimize.contains(info)) {
|
||||
|
||||
if (segmentsToOptimize.containsKey(info)) {
|
||||
last++;
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -236,7 +236,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
private DocumentsWriter docWriter;
|
||||
final IndexFileDeleter deleter;
|
||||
|
||||
private Set<SegmentInfo> segmentsToOptimize = new HashSet<SegmentInfo>(); // used by optimize to note those needing optimization
|
||||
private Map<SegmentInfo,Boolean> segmentsToOptimize = new HashMap<SegmentInfo,Boolean>(); // used by optimize to note those needing optimization
|
||||
private int optimizeMaxNumSegments;
|
||||
|
||||
private Lock writeLock;
|
||||
|
@ -1664,7 +1664,9 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
synchronized(this) {
|
||||
resetMergeExceptions();
|
||||
segmentsToOptimize.clear();
|
||||
segmentsToOptimize.addAll(segmentInfos.asSet());
|
||||
for(SegmentInfo info : segmentInfos) {
|
||||
segmentsToOptimize.put(info, Boolean.TRUE);
|
||||
}
|
||||
optimizeMaxNumSegments = maxNumSegments;
|
||||
|
||||
// Now mark all pending & running merges as optimize
|
||||
|
@ -1888,7 +1890,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
|
||||
final MergePolicy.MergeSpecification spec;
|
||||
if (optimize) {
|
||||
spec = mergePolicy.findMergesForOptimize(segmentInfos, maxNumSegmentsOptimize, Collections.unmodifiableSet(segmentsToOptimize));
|
||||
spec = mergePolicy.findMergesForOptimize(segmentInfos, maxNumSegmentsOptimize, Collections.unmodifiableMap(segmentsToOptimize));
|
||||
|
||||
if (spec != null) {
|
||||
final int numMerges = spec.merges.size();
|
||||
|
@ -3042,7 +3044,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
|
||||
if (merge.optimize) {
|
||||
// cascade the optimize:
|
||||
segmentsToOptimize.add(merge.info);
|
||||
segmentsToOptimize.put(merge.info, Boolean.FALSE);
|
||||
}
|
||||
|
||||
return true;
|
||||
|
@ -3086,7 +3088,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public final void merge(MergePolicy.OneMerge merge)
|
||||
public void merge(MergePolicy.OneMerge merge)
|
||||
throws CorruptIndexException, IOException {
|
||||
|
||||
boolean success = false;
|
||||
|
@ -3167,7 +3169,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
if (info.dir != directory) {
|
||||
isExternal = true;
|
||||
}
|
||||
if (segmentsToOptimize.contains(info)) {
|
||||
if (segmentsToOptimize.containsKey(info)) {
|
||||
merge.optimize = true;
|
||||
merge.maxNumSegmentsOptimize = optimizeMaxNumSegments;
|
||||
}
|
||||
|
|
|
@ -21,7 +21,7 @@ import java.io.IOException;
|
|||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.Map;
|
||||
|
||||
/** <p>This class implements a {@link MergePolicy} that tries
|
||||
* to merge segments into levels of exponentially
|
||||
|
@ -201,20 +201,23 @@ public abstract class LogMergePolicy extends MergePolicy {
|
|||
}
|
||||
}
|
||||
|
||||
protected boolean isOptimized(SegmentInfos infos, int maxNumSegments, Set<SegmentInfo> segmentsToOptimize) throws IOException {
|
||||
protected boolean isOptimized(SegmentInfos infos, int maxNumSegments, Map<SegmentInfo,Boolean> segmentsToOptimize) throws IOException {
|
||||
final int numSegments = infos.size();
|
||||
int numToOptimize = 0;
|
||||
SegmentInfo optimizeInfo = null;
|
||||
boolean segmentIsOriginal = false;
|
||||
for(int i=0;i<numSegments && numToOptimize <= maxNumSegments;i++) {
|
||||
final SegmentInfo info = infos.info(i);
|
||||
if (segmentsToOptimize.contains(info)) {
|
||||
final Boolean isOriginal = segmentsToOptimize.get(info);
|
||||
if (isOriginal != null) {
|
||||
segmentIsOriginal = isOriginal;
|
||||
numToOptimize++;
|
||||
optimizeInfo = info;
|
||||
}
|
||||
}
|
||||
|
||||
return numToOptimize <= maxNumSegments &&
|
||||
(numToOptimize != 1 || isOptimized(optimizeInfo));
|
||||
(numToOptimize != 1 || !segmentIsOriginal || isOptimized(optimizeInfo));
|
||||
}
|
||||
|
||||
/** Returns true if this single info is optimized (has no
|
||||
|
@ -346,7 +349,7 @@ public abstract class LogMergePolicy extends MergePolicy {
|
|||
* in use may make use of concurrency. */
|
||||
@Override
|
||||
public MergeSpecification findMergesForOptimize(SegmentInfos infos,
|
||||
int maxNumSegments, Set<SegmentInfo> segmentsToOptimize) throws IOException {
|
||||
int maxNumSegments, Map<SegmentInfo,Boolean> segmentsToOptimize) throws IOException {
|
||||
|
||||
assert maxNumSegments > 0;
|
||||
if (verbose()) {
|
||||
|
@ -368,7 +371,7 @@ public abstract class LogMergePolicy extends MergePolicy {
|
|||
int last = infos.size();
|
||||
while (last > 0) {
|
||||
final SegmentInfo info = infos.info(--last);
|
||||
if (segmentsToOptimize.contains(info)) {
|
||||
if (segmentsToOptimize.get(info) != null) {
|
||||
last++;
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -24,7 +24,7 @@ import org.apache.lucene.util.SetOnce.AlreadySetException;
|
|||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Set;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* <p>Expert: a MergePolicy determines the sequence of
|
||||
|
@ -297,10 +297,15 @@ public abstract class MergePolicy implements java.io.Closeable {
|
|||
* is always 1)
|
||||
* @param segmentsToOptimize
|
||||
* contains the specific SegmentInfo instances that must be merged
|
||||
* away. This may be a subset of all SegmentInfos.
|
||||
* away. This may be a subset of all
|
||||
* SegmentInfos. If the value is True for a
|
||||
* given SegmentInfo, that means this segment was
|
||||
* an original segment present in the
|
||||
* to-be-optimized index; else, it was a segment
|
||||
* produced by a cascaded merge.
|
||||
*/
|
||||
public abstract MergeSpecification findMergesForOptimize(
|
||||
SegmentInfos segmentInfos, int maxSegmentCount, Set<SegmentInfo> segmentsToOptimize)
|
||||
SegmentInfos segmentInfos, int maxSegmentCount, Map<SegmentInfo,Boolean> segmentsToOptimize)
|
||||
throws CorruptIndexException, IOException;
|
||||
|
||||
/**
|
||||
|
|
|
@ -18,7 +18,7 @@ package org.apache.lucene.index;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Set;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* A {@link MergePolicy} which never returns merges to execute (hence it's
|
||||
|
@ -59,7 +59,7 @@ public final class NoMergePolicy extends MergePolicy {
|
|||
|
||||
@Override
|
||||
public MergeSpecification findMergesForOptimize(SegmentInfos segmentInfos,
|
||||
int maxSegmentCount, Set<SegmentInfo> segmentsToOptimize)
|
||||
int maxSegmentCount, Map<SegmentInfo,Boolean> segmentsToOptimize)
|
||||
throws CorruptIndexException, IOException { return null; }
|
||||
|
||||
@Override
|
||||
|
|
|
@ -18,7 +18,7 @@ package org.apache.lucene.index;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Set;
|
||||
import java.util.Map;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
|
@ -472,7 +472,7 @@ public class TieredMergePolicy extends MergePolicy {
|
|||
}
|
||||
|
||||
@Override
|
||||
public MergeSpecification findMergesForOptimize(SegmentInfos infos, int maxSegmentCount, Set<SegmentInfo> segmentsToOptimize) throws IOException {
|
||||
public MergeSpecification findMergesForOptimize(SegmentInfos infos, int maxSegmentCount, Map<SegmentInfo,Boolean> segmentsToOptimize) throws IOException {
|
||||
if (verbose()) {
|
||||
message("findMergesForOptimize maxSegmentCount=" + maxSegmentCount + " infos=" + writer.get().segString(infos) + " segmentsToOptimize=" + segmentsToOptimize);
|
||||
}
|
||||
|
@ -480,8 +480,11 @@ public class TieredMergePolicy extends MergePolicy {
|
|||
List<SegmentInfo> eligible = new ArrayList<SegmentInfo>();
|
||||
boolean optimizeMergeRunning = false;
|
||||
final Collection<SegmentInfo> merging = writer.get().getMergingSegments();
|
||||
boolean segmentIsOriginal = false;
|
||||
for(SegmentInfo info : infos) {
|
||||
if (segmentsToOptimize.contains(info)) {
|
||||
final Boolean isOriginal = segmentsToOptimize.get(info);
|
||||
if (isOriginal != null) {
|
||||
segmentIsOriginal = isOriginal;
|
||||
if (!merging.contains(info)) {
|
||||
eligible.add(info);
|
||||
} else {
|
||||
|
@ -495,7 +498,7 @@ public class TieredMergePolicy extends MergePolicy {
|
|||
}
|
||||
|
||||
if ((maxSegmentCount > 1 && eligible.size() <= maxSegmentCount) ||
|
||||
(maxSegmentCount == 1 && eligible.size() == 1 && isOptimized(eligible.get(0)))) {
|
||||
(maxSegmentCount == 1 && eligible.size() == 1 && (!segmentIsOriginal || isOptimized(eligible.get(0))))) {
|
||||
if (verbose()) {
|
||||
message("already optimized");
|
||||
}
|
||||
|
|
|
@ -21,9 +21,9 @@ import org.apache.lucene.util.Constants;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
|
||||
/** This {@link MergePolicy} is used for upgrading all existing segments of
|
||||
* an index when calling {@link IndexWriter#optimize()}.
|
||||
|
@ -79,12 +79,13 @@ public class UpgradeIndexMergePolicy extends MergePolicy {
|
|||
}
|
||||
|
||||
@Override
|
||||
public MergeSpecification findMergesForOptimize(SegmentInfos segmentInfos, int maxSegmentCount, Set<SegmentInfo> segmentsToOptimize) throws CorruptIndexException, IOException {
|
||||
public MergeSpecification findMergesForOptimize(SegmentInfos segmentInfos, int maxSegmentCount, Map<SegmentInfo,Boolean> segmentsToOptimize) throws CorruptIndexException, IOException {
|
||||
// first find all old segments
|
||||
final HashSet<SegmentInfo> oldSegments = new HashSet<SegmentInfo>();
|
||||
final Map<SegmentInfo,Boolean> oldSegments = new HashMap<SegmentInfo,Boolean>();
|
||||
for (final SegmentInfo si : segmentInfos) {
|
||||
if (segmentsToOptimize.contains(si) && shouldUpgradeSegment(si)) {
|
||||
oldSegments.add(si);
|
||||
final Boolean v =segmentsToOptimize.get(si);
|
||||
if (v != null && shouldUpgradeSegment(si)) {
|
||||
oldSegments.put(si, v);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -93,14 +94,16 @@ public class UpgradeIndexMergePolicy extends MergePolicy {
|
|||
if (oldSegments.isEmpty())
|
||||
return null;
|
||||
|
||||
MergeSpecification spec = base.findMergesForOptimize(segmentInfos, maxSegmentCount, oldSegments);
|
||||
MergeSpecification spec = base.findMergesForOptimize(segmentInfos, maxSegmentCount, oldSegments);
|
||||
|
||||
if (spec != null) {
|
||||
// remove all segments that are in merge specification from oldSegments,
|
||||
// the resulting set contains all segments that are left over
|
||||
// and will be merged to one additional segment:
|
||||
for (final OneMerge om : spec.merges) {
|
||||
oldSegments.removeAll(om.segments);
|
||||
for(SegmentInfo info : om.segments) {
|
||||
oldSegments.remove(info);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -110,7 +113,7 @@ public class UpgradeIndexMergePolicy extends MergePolicy {
|
|||
" does not want to merge all old segments, merge remaining ones into new segment: " + oldSegments);
|
||||
final List<SegmentInfo> newInfos = new ArrayList<SegmentInfo>();
|
||||
for (final SegmentInfo si : segmentInfos) {
|
||||
if (oldSegments.contains(si)) {
|
||||
if (oldSegments.containsKey(si)) {
|
||||
newInfos.add(si);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -22,7 +22,7 @@ import java.util.ArrayList;
|
|||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
|
@ -56,12 +56,12 @@ public class MockRandomMergePolicy extends MergePolicy {
|
|||
|
||||
@Override
|
||||
public MergeSpecification findMergesForOptimize(
|
||||
SegmentInfos segmentInfos, int maxSegmentCount, Set<SegmentInfo> segmentsToOptimize)
|
||||
SegmentInfos segmentInfos, int maxSegmentCount, Map<SegmentInfo,Boolean> segmentsToOptimize)
|
||||
throws CorruptIndexException, IOException {
|
||||
|
||||
final List<SegmentInfo> eligibleSegments = new ArrayList<SegmentInfo>();
|
||||
for(SegmentInfo info : segmentInfos) {
|
||||
if (segmentsToOptimize.contains(info)) {
|
||||
if (segmentsToOptimize.containsKey(info)) {
|
||||
eligibleSegments.add(info);
|
||||
}
|
||||
}
|
||||
|
@ -85,7 +85,7 @@ public class MockRandomMergePolicy extends MergePolicy {
|
|||
if (mergeSpec != null) {
|
||||
for(OneMerge merge : mergeSpec.merges) {
|
||||
for(SegmentInfo info : merge.segments) {
|
||||
assert segmentsToOptimize.contains(info);
|
||||
assert segmentsToOptimize.containsKey(info);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,7 +21,7 @@ import java.io.IOException;
|
|||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.index.TermsEnum.SeekStatus;
|
||||
|
@ -271,7 +271,7 @@ public class TestPerSegmentDeletes extends LuceneTestCase {
|
|||
|
||||
@Override
|
||||
public MergeSpecification findMergesForOptimize(SegmentInfos segmentInfos,
|
||||
int maxSegmentCount, Set<SegmentInfo> segmentsToOptimize)
|
||||
int maxSegmentCount, Map<SegmentInfo,Boolean> segmentsToOptimize)
|
||||
throws CorruptIndexException, IOException {
|
||||
return null;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue