avoid pathological merging

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1630068 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2014-10-08 10:45:45 +00:00
parent 808ae52722
commit f11cf0c2f1
1 changed files with 94 additions and 4 deletions

View File

@ -61,10 +61,51 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.*;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.AlcoholicMergePolicy;
import org.apache.lucene.index.AssertingDirectoryReader;
import org.apache.lucene.index.AssertingLeafReader;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.CompositeReader;
import org.apache.lucene.index.ConcurrentMergeScheduler;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldFilterLeafReader;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader.ReaderClosedListener;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.LiveIndexWriterConfig;
import org.apache.lucene.index.LogByteSizeMergePolicy;
import org.apache.lucene.index.LogDocMergePolicy;
import org.apache.lucene.index.LogMergePolicy;
import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.MergeScheduler;
import org.apache.lucene.index.MockRandomMergePolicy;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.ParallelCompositeReader;
import org.apache.lucene.index.ParallelLeafReader;
import org.apache.lucene.index.SegmentReader;
import org.apache.lucene.index.SerialMergeScheduler;
import org.apache.lucene.index.SimpleMergedSegmentWarmer;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.StorableField;
import org.apache.lucene.index.StoredDocument;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum.SeekStatus;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.TieredMergePolicy;
import org.apache.lucene.search.AssertingIndexSearcher;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
@ -95,7 +136,6 @@ import org.junit.Test;
import org.junit.rules.RuleChain;
import org.junit.rules.TestRule;
import org.junit.runner.RunWith;
import com.carrotsearch.randomizedtesting.JUnit4MethodProvider;
import com.carrotsearch.randomizedtesting.LifecycleScope;
import com.carrotsearch.randomizedtesting.MixWithSuiteName;
@ -707,8 +747,9 @@ public abstract class LuceneTestCase extends Assert {
*/
public static SegmentReader getOnlySegmentReader(DirectoryReader reader) {
List<LeafReaderContext> subReaders = reader.leaves();
if (subReaders.size() != 1)
if (subReaders.size() != 1) {
throw new IllegalArgumentException(reader + " has " + subReaders.size() + " segments instead of exactly one");
}
final LeafReader r = subReaders.get(0).reader();
assertTrue(r instanceof SegmentReader);
return (SegmentReader) r;
@ -862,6 +903,8 @@ public abstract class LuceneTestCase extends Assert {
c.setMergePolicy(newMergePolicy(r));
avoidPathologicalMerging(c);
if (rarely(r)) {
c.setMergedSegmentWarmer(new SimpleMergedSegmentWarmer(c.getInfoStream()));
}
@ -870,6 +913,53 @@ public abstract class LuceneTestCase extends Assert {
return c;
}
private static void avoidPathologicalMerging(IndexWriterConfig iwc) {
// Don't allow "tiny" flushed segments with "big" merge
// floor: this leads to pathological O(N^2) merge costs:
long estFlushSizeBytes = Long.MAX_VALUE;
if (iwc.getMaxBufferedDocs() != IndexWriterConfig.DISABLE_AUTO_FLUSH) {
// Gross estimation of 1 KB segment bytes for each doc indexed:
estFlushSizeBytes = Math.min(estFlushSizeBytes, iwc.getMaxBufferedDocs() * 1024);
}
if (iwc.getRAMBufferSizeMB() != IndexWriterConfig.DISABLE_AUTO_FLUSH) {
estFlushSizeBytes = Math.min(estFlushSizeBytes, (long) (iwc.getRAMBufferSizeMB() * 1024 * 1024));
}
assert estFlushSizeBytes > 0;
MergePolicy mp = iwc.getMergePolicy();
if (mp instanceof TieredMergePolicy) {
TieredMergePolicy tmp = (TieredMergePolicy) mp;
long floorSegBytes = (long) (tmp.getFloorSegmentMB() * 1024 * 1024);
if (floorSegBytes / estFlushSizeBytes > 10) {
double newValue = estFlushSizeBytes * 10.0 / 1024 / 1024;
if (VERBOSE) {
System.out.println("NOTE: LuceneTestCase: changing TieredMergePolicy.floorSegmentMB from " + tmp.getFloorSegmentMB() + " to " + newValue + " to avoid pathological merging");
}
tmp.setFloorSegmentMB(newValue);
}
} else if (mp instanceof LogByteSizeMergePolicy) {
LogByteSizeMergePolicy lmp = (LogByteSizeMergePolicy) mp;
if ((lmp.getMinMergeMB()*1024*1024) / estFlushSizeBytes > 10) {
double newValue = estFlushSizeBytes * 10.0 / 1024 / 1024;
if (VERBOSE) {
System.out.println("NOTE: LuceneTestCase: changing LogByteSizeMergePolicy.minMergeMB from " + lmp.getMinMergeMB() + " to " + newValue + " to avoid pathological merging");
}
lmp.setMinMergeMB(newValue);
}
} else if (mp instanceof LogDocMergePolicy) {
LogDocMergePolicy lmp = (LogDocMergePolicy) mp;
assert estFlushSizeBytes / 1024 < Integer.MAX_VALUE/10;
int estFlushDocs = Math.max(1, (int) (estFlushSizeBytes / 1024));
if (lmp.getMinMergeDocs() / estFlushDocs > 10) {
int newValue = estFlushDocs * 10;
if (VERBOSE) {
System.out.println("NOTE: LuceneTestCase: changing LogDocMergePolicy.minMergeDocs from " + lmp.getMinMergeDocs() + " to " + newValue + " to avoid pathological merging");
}
lmp.setMinMergeDocs(newValue);
}
}
}
public static MergePolicy newMergePolicy(Random r) {
if (rarely(r)) {
return new MockRandomMergePolicy(r);