LUCENE-5038: Refactor CompoundFile settings in MergePolicy and IndexWriterConfig

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1492701 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Simon Willnauer 2013-06-13 15:11:14 +00:00
parent af2128c683
commit e83c3f7a8b
43 changed files with 279 additions and 352 deletions

View File

@ -99,6 +99,10 @@ Changes in backwards compatibility policy
sqrt(), but this caching is removed, as sqrt is implemented in hardware with modern
jvms and its faster not to cache. (Robert Muir)
* LUCENE-5038: MergePolicy now has a default implementation for useCompoundFile based
on segment size and noCFSRatio. The default implemantion was pulled up from
TieredMergePolicy. (Simon Willnauer)
Bug Fixes
* LUCENE-4997: Internal test framework's tests are sensitive to previous
@ -238,6 +242,13 @@ Tests
JRE vendor via Runtime.halt().
(Mike McCandless, Robert Muir, Uwe Schindler, Rodrigo Trujillo, Dawid Weiss)
Changes in runtime behavior
* LUCENE-5038: New segments written by IndexWriter are now wrapped into CFS
by default. DocumentsWriterPerThread doesn't consult MergePolicy anymore
to decide if a CFS must be written, instead IndexWriterConfig now has a
property to enable / disable CFS for newly created segments. (Simon Willnauer)
======================= Lucene 4.3.1 =======================
Bug Fixes

View File

@ -151,13 +151,10 @@ public class CreateIndexTask extends PerfTask {
} catch (Exception e) {
throw new RuntimeException("unable to instantiate class '" + mergePolicy + "' as merge policy", e);
}
iwConf.getMergePolicy().setNoCFSRatio(isCompound ? 1.0 : 0.0);
if(iwConf.getMergePolicy() instanceof LogMergePolicy) {
LogMergePolicy logMergePolicy = (LogMergePolicy) iwConf.getMergePolicy();
logMergePolicy.setUseCompoundFile(isCompound);
logMergePolicy.setMergeFactor(config.get("merge.factor",OpenIndexTask.DEFAULT_MERGE_PFACTOR));
} else if(iwConf.getMergePolicy() instanceof TieredMergePolicy) {
TieredMergePolicy tieredMergePolicy = (TieredMergePolicy) iwConf.getMergePolicy();
tieredMergePolicy.setUseCompoundFile(isCompound);
}
}
final double ramBuffer = config.get("ram.flush.mb",OpenIndexTask.DEFAULT_RAM_FLUSH_MB);

View File

@ -49,6 +49,7 @@ import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LogDocMergePolicy;
import org.apache.lucene.index.LogMergePolicy;
import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.index.SerialMergeScheduler;
@ -754,7 +755,7 @@ public class TestPerfTasksLogic extends BenchmarkTestCase {
assertEquals(2, writer.getConfig().getMaxBufferedDocs());
assertEquals(IndexWriterConfig.DISABLE_AUTO_FLUSH, (int) writer.getConfig().getRAMBufferSizeMB());
assertEquals(3, ((LogMergePolicy) writer.getConfig().getMergePolicy()).getMergeFactor());
assertFalse(((LogMergePolicy) writer.getConfig().getMergePolicy()).getUseCompoundFile());
assertEquals(0.0d, writer.getConfig().getMergePolicy().getNoCFSRatio(), 0.0);
writer.close();
Directory dir = benchmark.getRunData().getDirectory();
IndexReader reader = DirectoryReader.open(dir);

View File

@ -29,15 +29,11 @@ import org.apache.lucene.index.DocumentsWriterPerThread.FlushedSegment;
import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain;
import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState;
import org.apache.lucene.index.FieldInfos.FieldNumbers;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FlushInfo;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.MutableBits;
/**
* This class accepts multiple added documents and directly
@ -114,6 +110,7 @@ final class DocumentsWriter {
List<String> newFiles;
final IndexWriter indexWriter;
final LiveIndexWriterConfig indexWriterConfig;
private AtomicInteger numDocsInRAM = new AtomicInteger(0);
@ -144,6 +141,7 @@ final class DocumentsWriter {
this.indexWriter = writer;
this.infoStream = config.getInfoStream();
this.similarity = config.getSimilarity();
this.indexWriterConfig = writer.getConfig();
this.perThreadPool = config.getIndexerThreadPool();
this.chain = config.getIndexingChain();
this.perThreadPool.initialize(this, globalFieldNumbers, config);
@ -517,7 +515,7 @@ final class DocumentsWriter {
// buffer, force them all to apply now. This is to
// prevent too-frequent flushing of a long tail of
// tiny segments:
final double ramBufferSizeMB = indexWriter.getConfig().getRAMBufferSizeMB();
final double ramBufferSizeMB = indexWriterConfig.getRAMBufferSizeMB();
if (ramBufferSizeMB != IndexWriterConfig.DISABLE_AUTO_FLUSH &&
flushControl.getDeleteBytesUsed() > (1024*1024*ramBufferSizeMB/2)) {
if (infoStream.isEnabled("DW")) {

View File

@ -194,6 +194,7 @@ class DocumentsWriterPerThread {
private final NumberFormat nf = NumberFormat.getInstance(Locale.ROOT);
final Allocator byteBlockAllocator;
final IntBlockPool.Allocator intBlockAllocator;
private final LiveIndexWriterConfig indexWriterConfig;
public DocumentsWriterPerThread(Directory directory, DocumentsWriter parent,
@ -203,6 +204,7 @@ class DocumentsWriterPerThread {
this.parent = parent;
this.fieldInfos = fieldInfos;
this.writer = parent.indexWriter;
this.indexWriterConfig = parent.indexWriterConfig;
this.infoStream = parent.infoStream;
this.codec = parent.codec;
this.docState = new DocState(this, infoStream);
@ -567,7 +569,7 @@ class DocumentsWriterPerThread {
boolean success = false;
try {
if (writer.useCompoundFile(newSegment)) {
if (indexWriterConfig.getUseCompoundFile()) {
// Now build compound file
Collection<String> oldFiles = IndexWriter.createCompoundFile(infoStream, directory, MergeState.CheckAbort.NONE, newSegment.info, context);

View File

@ -2269,10 +2269,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
}
}
synchronized boolean useCompoundFile(SegmentInfoPerCommit segmentInfo) throws IOException {
return mergePolicy.useCompoundFile(segmentInfos, segmentInfo);
}
private synchronized void resetMergeExceptions() {
mergeExceptions = new ArrayList<MergePolicy.OneMerge>();
mergeGen++;

View File

@ -110,6 +110,10 @@ public final class IndexWriterConfig extends LiveIndexWriterConfig implements Cl
* others to finish. Default value is 8. */
public final static int DEFAULT_MAX_THREAD_STATES = 8;
/** Default value for compound file system for newly written segments
* (set to <code>true</code>). For batch indexing with very large
* ram buffers use <code>false</code> */
public final static boolean DEFAULT_USE_COMPOUND_FILE_SYSTEM = true;
/**
* Sets the default (for any instance) maximum time to wait for a write lock
* (in milliseconds).
@ -540,5 +544,9 @@ public final class IndexWriterConfig extends LiveIndexWriterConfig implements Cl
public IndexWriterConfig setTermIndexInterval(int interval) {
return (IndexWriterConfig) super.setTermIndexInterval(interval);
}
public IndexWriterConfig setUseCompoundFile(boolean useCompoundFile) {
return (IndexWriterConfig) super.setUseCompoundFile(useCompoundFile);
}
}

View File

@ -98,6 +98,9 @@ public class LiveIndexWriterConfig {
/** {@link Version} that {@link IndexWriter} should emulate. */
protected final Version matchVersion;
/** True if segment flushes should use compound file format */
protected volatile boolean useCompoundFile = IndexWriterConfig.DEFAULT_USE_COMPOUND_FILE_SYSTEM;
// used by IndexWriterConfig
LiveIndexWriterConfig(Analyzer analyzer, Version matchVersion) {
this.analyzer = analyzer;
@ -110,6 +113,7 @@ public class LiveIndexWriterConfig {
termIndexInterval = IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL; // TODO: this should be private to the codec, not settable here
delPolicy = new KeepOnlyLastCommitDeletionPolicy();
commit = null;
useCompoundFile = IndexWriterConfig.DEFAULT_USE_COMPOUND_FILE_SYSTEM;
openMode = OpenMode.CREATE_OR_APPEND;
similarity = IndexSearcher.getDefaultSimilarity();
mergeScheduler = new ConcurrentMergeScheduler();
@ -154,6 +158,7 @@ public class LiveIndexWriterConfig {
readerPooling = config.getReaderPooling();
flushPolicy = config.getFlushPolicy();
perThreadHardLimitMB = config.getRAMPerThreadHardLimitMB();
useCompoundFile = config.getUseCompoundFile();
}
/** Returns the default analyzer to use for indexing documents. */
@ -542,6 +547,33 @@ public class LiveIndexWriterConfig {
return infoStream;
}
/**
* Sets if the {@link IndexWriter} should pack newly written segments in a
* compound file. Default is <code>true</code>.
* <p>
* Use <code>false</code> for batch indexing with very large ram buffer
* settings.
* </p>
* <p>
* <b>Note: To control compound file usage during segment merges see
* {@link MergePolicy#setNoCFSRatio(double)} and
* {@link MergePolicy#setMaxCFSSegmentSizeMB(double)}. This setting only
* applies to newly created segments.</b>
* </p>
*/
public LiveIndexWriterConfig setUseCompoundFile(boolean useCompoundFile) {
this.useCompoundFile = useCompoundFile;
return this;
}
/**
* Retruns <code>true</code> iff the {@link IndexWriter} packs
* newly written segments in a compound file. Default is <code>true</code>.
*/
public boolean getUseCompoundFile() {
return useCompoundFile ;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
@ -567,7 +599,10 @@ public class LiveIndexWriterConfig {
sb.append("indexerThreadPool=").append(getIndexerThreadPool()).append("\n");
sb.append("readerPooling=").append(getReaderPooling()).append("\n");
sb.append("perThreadHardLimitMB=").append(getRAMPerThreadHardLimitMB()).append("\n");
sb.append("useCompoundFile=").append(getUseCompoundFile()).append("\n");
return sb.toString();
}
}

View File

@ -64,16 +64,9 @@ public abstract class LogMergePolicy extends MergePolicy {
/** Default noCFSRatio. If a merge's size is >= 10% of
* the index, then we disable compound file for it.
* @see #setNoCFSRatio */
* @see MergePolicy#setNoCFSRatio */
public static final double DEFAULT_NO_CFS_RATIO = 0.1;
/** Default maxCFSSegmentSize value allows compound file
* for a segment of any size. The actual file format is
* still subject to noCFSRatio.
* @see #setMaxCFSSegmentSizeMB(double)
*/
public static final long DEFAULT_MAX_CFS_SEGMENT_SIZE = Long.MAX_VALUE;
/** How many segments to merge at a time. */
protected int mergeFactor = DEFAULT_MERGE_FACTOR;
@ -96,30 +89,14 @@ public abstract class LogMergePolicy extends MergePolicy {
* will never be merged. */
protected int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;
/** If the size of the merge segment exceeds this ratio of
* the total index size then it will remain in
* non-compound format even if {@link
* #setUseCompoundFile} is {@code true}. */
protected double noCFSRatio = DEFAULT_NO_CFS_RATIO;
/** If the size of the merged segment exceeds
* this value then it will not use compound file format. */
protected long maxCFSSegmentSize = DEFAULT_MAX_CFS_SEGMENT_SIZE;
/** If true, we pro-rate a segment's size by the
* percentage of non-deleted documents. */
protected boolean calibrateSizeByDeletes = true;
/** True if new segments (flushed or merged) should use
* the compound file format. Note that large segments
* may sometimes still use non-compound format (see
* {@link #setNoCFSRatio}. */
protected boolean useCompoundFile = true;
/** Sole constructor. (For invocation by subclass
* constructors, typically implicit.) */
public LogMergePolicy() {
super();
super(DEFAULT_NO_CFS_RATIO, MergePolicy.DEFAULT_MAX_CFS_SEGMENT_SIZE);
}
/** Returns true if {@code LMP} is enabled in {@link
@ -129,25 +106,6 @@ public abstract class LogMergePolicy extends MergePolicy {
return w != null && w.infoStream.isEnabled("LMP");
}
/** Returns current {@code noCFSRatio}.
*
* @see #setNoCFSRatio */
public double getNoCFSRatio() {
return noCFSRatio;
}
/** If a merged segment will be more than this percentage
* of the total size of the index, leave the segment as
* non-compound file even if compound file is enabled.
* Set to 1.0 to always use CFS regardless of merge
* size. */
public void setNoCFSRatio(double noCFSRatio) {
if (noCFSRatio < 0.0 || noCFSRatio > 1.0) {
throw new IllegalArgumentException("noCFSRatio must be 0.0 to 1.0 inclusive; got " + noCFSRatio);
}
this.noCFSRatio = noCFSRatio;
}
/** Print a debug message to {@link IndexWriter}'s {@code
* infoStream}. */
protected void message(String message) {
@ -178,39 +136,6 @@ public abstract class LogMergePolicy extends MergePolicy {
this.mergeFactor = mergeFactor;
}
// Javadoc inherited
@Override
public boolean useCompoundFile(SegmentInfos infos, SegmentInfoPerCommit mergedInfo) throws IOException {
if (!getUseCompoundFile()) {
return false;
}
long mergedInfoSize = size(mergedInfo);
if (mergedInfoSize > maxCFSSegmentSize) {
return false;
}
if (getNoCFSRatio() >= 1.0) {
return true;
}
long totalSize = 0;
for (SegmentInfoPerCommit info : infos) {
totalSize += size(info);
}
return mergedInfoSize <= getNoCFSRatio() * totalSize;
}
/** Sets whether compound file format should be used for
* newly flushed and newly merged segments. */
public void setUseCompoundFile(boolean useCompoundFile) {
this.useCompoundFile = useCompoundFile;
}
/** Returns true if newly flushed and newly merge segments
* are written in compound file format. @see
* #setUseCompoundFile */
public boolean getUseCompoundFile() {
return useCompoundFile;
}
/** Sets whether the segment size should be calibrated by
* the number of deletes when choosing segments for merge. */
public void setCalibrateSizeByDeletes(boolean calibrateSizeByDeletes) {
@ -226,9 +151,6 @@ public abstract class LogMergePolicy extends MergePolicy {
@Override
public void close() {}
/** Return the size of the provided {@link
* SegmentInfoPerCommit}. */
abstract protected long size(SegmentInfoPerCommit info) throws IOException;
/** Return the number of documents in the provided {@link
* SegmentInfoPerCommit}, pro-rated by percentage of
@ -249,15 +171,10 @@ public abstract class LogMergePolicy extends MergePolicy {
* non-deleted documents if {@link
* #setCalibrateSizeByDeletes} is set. */
protected long sizeBytes(SegmentInfoPerCommit info) throws IOException {
long byteSize = info.sizeInBytes();
if (calibrateSizeByDeletes) {
int delCount = writer.get().numDeletedDocs(info);
double delRatio = (info.info.getDocCount() <= 0 ? 0.0f : ((float)delCount / (float)info.info.getDocCount()));
assert delRatio <= 1.0;
return (info.info.getDocCount() <= 0 ? byteSize : (long)(byteSize * (1.0 - delRatio)));
} else {
return byteSize;
return super.size(info);
}
return info.sizeInBytes();
}
/** Returns true if the number of segments eligible for
@ -282,19 +199,6 @@ public abstract class LogMergePolicy extends MergePolicy {
(numToMerge != 1 || !segmentIsOriginal || isMerged(mergeInfo));
}
/** Returns true if this single info is already fully merged (has no
* pending norms or deletes, is in the same dir as the
* writer, and matches the current compound file setting */
protected boolean isMerged(SegmentInfoPerCommit info)
throws IOException {
IndexWriter w = writer.get();
assert w != null;
boolean hasDeletions = w.numDeletedDocs(info) > 0;
return !hasDeletions &&
info.info.dir == w.getDirectory() &&
(info.info.getUseCompoundFile() == useCompoundFile || noCFSRatio < 1.0);
}
/**
* Returns the merges necessary to merge the index, taking the max merge
* size or max merge docs into consideration. This method attempts to respect
@ -726,29 +630,10 @@ public abstract class LogMergePolicy extends MergePolicy {
sb.append("maxMergeSizeForForcedMerge=").append(maxMergeSizeForForcedMerge).append(", ");
sb.append("calibrateSizeByDeletes=").append(calibrateSizeByDeletes).append(", ");
sb.append("maxMergeDocs=").append(maxMergeDocs).append(", ");
sb.append("useCompoundFile=").append(useCompoundFile).append(", ");
sb.append("maxCFSSegmentSizeMB=").append(getMaxCFSSegmentSizeMB()).append(", ");
sb.append("noCFSRatio=").append(noCFSRatio);
sb.append("]");
return sb.toString();
}
/** Returns the largest size allowed for a compound file segment */
public final double getMaxCFSSegmentSizeMB() {
return maxCFSSegmentSize/1024/1024.;
}
/** If a merged segment will be more than this value,
* leave the segment as
* non-compound file even if compound file is enabled.
* Set this to Double.POSITIVE_INFINITY (default) and noCFSRatio to 1.0
* to always use CFS regardless of merge size. */
public final void setMaxCFSSegmentSizeMB(double v) {
if (v < 0.0) {
throw new IllegalArgumentException("maxCFSSegmentSizeMB must be >=0 (got " + v + ")");
}
v *= 1024 * 1024;
this.maxCFSSegmentSize = (v > Long.MAX_VALUE) ? Long.MAX_VALUE : (long) v;
}
}

View File

@ -57,7 +57,6 @@ import org.apache.lucene.util.SetOnce;
*
* @lucene.experimental
*/
public abstract class MergePolicy implements java.io.Closeable, Cloneable {
/** A map of doc IDs. */
@ -361,9 +360,29 @@ public abstract class MergePolicy implements java.io.Closeable, Cloneable {
super(message);
}
}
/**
* Default ratio for compound file system usage. Set to <tt>1.0</tt>, always use
* compound file system.
*/
protected static final double DEFAULT_NO_CFS_RATIO = 1.0;
/**
* Default max segment size in order to use compound file system. Set to {@link Long#MAX_VALUE}.
*/
protected static final long DEFAULT_MAX_CFS_SEGMENT_SIZE = Long.MAX_VALUE;
/** {@link IndexWriter} that contains this instance. */
protected SetOnce<IndexWriter> writer;
/** If the size of the merge segment exceeds this ratio of
* the total index size then it will remain in
* non-compound format */
protected double noCFSRatio = DEFAULT_NO_CFS_RATIO;
/** If the size of the merged segment exceeds
* this value then it will not use compound file format. */
protected long maxCFSSegmentSize = DEFAULT_MAX_CFS_SEGMENT_SIZE;
@Override
public MergePolicy clone() {
@ -384,7 +403,18 @@ public abstract class MergePolicy implements java.io.Closeable, Cloneable {
* {@link #setIndexWriter(IndexWriter)}.
*/
public MergePolicy() {
this(DEFAULT_NO_CFS_RATIO, DEFAULT_MAX_CFS_SEGMENT_SIZE);
}
/**
* Creates a new merge policy instance with default settings for noCFSRatio
* and maxCFSSegmentSize. This ctor should be used by subclasses using different
* defaults than the {@link MergePolicy}
*/
protected MergePolicy(double defaultNoCFSRatio, long defaultMaxCFSSegmentSize) {
writer = new SetOnce<IndexWriter>();
this.noCFSRatio = defaultNoCFSRatio;
this.maxCFSSegmentSize = defaultMaxCFSSegmentSize;
}
/**
@ -451,12 +481,91 @@ public abstract class MergePolicy implements java.io.Closeable, Cloneable {
@Override
public abstract void close();
/**
* Returns true if a new segment (regardless of its origin) should use the compound file format.
* Returns true if a new segment (regardless of its origin) should use the
* compound file format. The default implementation returns <code>true</code>
* iff the size of the given mergedInfo is less or equal to
* {@link #getMaxCFSSegmentSizeMB()} and the size is less or equal to the
* TotalIndexSize * {@link #getNoCFSRatio()} otherwise <code>false</code>.
*/
public abstract boolean useCompoundFile(SegmentInfos segments, SegmentInfoPerCommit newSegment) throws IOException;
public boolean useCompoundFile(SegmentInfos infos, SegmentInfoPerCommit mergedInfo) throws IOException {
if (getNoCFSRatio() == 0.0) {
return false;
}
long mergedInfoSize = size(mergedInfo);
if (mergedInfoSize > maxCFSSegmentSize) {
return false;
}
if (getNoCFSRatio() >= 1.0) {
return true;
}
long totalSize = 0;
for (SegmentInfoPerCommit info : infos) {
totalSize += size(info);
}
return mergedInfoSize <= getNoCFSRatio() * totalSize;
}
/** Return the byte size of the provided {@link
* SegmentInfoPerCommit}, pro-rated by percentage of
* non-deleted documents is set. */
protected long size(SegmentInfoPerCommit info) throws IOException {
long byteSize = info.sizeInBytes();
int delCount = writer.get().numDeletedDocs(info);
double delRatio = (info.info.getDocCount() <= 0 ? 0.0f : ((float)delCount / (float)info.info.getDocCount()));
assert delRatio <= 1.0;
return (info.info.getDocCount() <= 0 ? byteSize : (long)(byteSize * (1.0 - delRatio)));
}
/** Returns true if this single info is already fully merged (has no
* pending deletes, is in the same dir as the
* writer, and matches the current compound file setting */
protected final boolean isMerged(SegmentInfoPerCommit info) {
IndexWriter w = writer.get();
assert w != null;
boolean hasDeletions = w.numDeletedDocs(info) > 0;
return !hasDeletions &&
info.info.dir == w.getDirectory() &&
((noCFSRatio > 0.0 && noCFSRatio < 1.0) || maxCFSSegmentSize < Long.MAX_VALUE);
}
/** Returns current {@code noCFSRatio}.
*
* @see #setNoCFSRatio */
public final double getNoCFSRatio() {
return noCFSRatio;
}
/** If a merged segment will be more than this percentage
* of the total size of the index, leave the segment as
* non-compound file even if compound file is enabled.
* Set to 1.0 to always use CFS regardless of merge
* size. */
public final void setNoCFSRatio(double noCFSRatio) {
if (noCFSRatio < 0.0 || noCFSRatio > 1.0) {
throw new IllegalArgumentException("noCFSRatio must be 0.0 to 1.0 inclusive; got " + noCFSRatio);
}
this.noCFSRatio = noCFSRatio;
}
/** Returns the largest size allowed for a compound file segment */
public final double getMaxCFSSegmentSizeMB() {
return maxCFSSegmentSize/1024/1024.;
}
/** If a merged segment will be more than this value,
* leave the segment as
* non-compound file even if compound file is enabled.
* Set this to Double.POSITIVE_INFINITY (default) and noCFSRatio to 1.0
* to always use CFS regardless of merge size. */
public final void setMaxCFSSegmentSizeMB(double v) {
if (v < 0.0) {
throw new IllegalArgumentException("maxCFSSegmentSizeMB must be >=0 (got " + v + ")");
}
v *= 1024 * 1024;
this.maxCFSSegmentSize = (v > Long.MAX_VALUE) ? Long.MAX_VALUE : (long) v;
}
/**
* MergeTrigger is passed to
* {@link MergePolicy#findMerges(MergeTrigger, SegmentInfos)} to indicate the

View File

@ -20,8 +20,6 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.Map;
import org.apache.lucene.index.MergePolicy.MergeTrigger;
import org.apache.lucene.index.MergePolicy.MergeSpecification;
/**
* A {@link MergePolicy} which never returns merges to execute (hence it's
@ -49,6 +47,7 @@ public final class NoMergePolicy extends MergePolicy {
private final boolean useCompoundFile;
private NoMergePolicy(boolean useCompoundFile) {
super(useCompoundFile ? 1.0 : 0.0, 0);
// prevent instantiation
this.useCompoundFile = useCompoundFile;
}
@ -71,6 +70,11 @@ public final class NoMergePolicy extends MergePolicy {
@Override
public void setIndexWriter(IndexWriter writer) {}
@Override
protected long size(SegmentInfoPerCommit info) throws IOException {
return Long.MAX_VALUE;
}
@Override
public String toString() {

View File

@ -27,9 +27,6 @@ import java.util.Comparator;
import java.util.List;
import java.util.ArrayList;
import org.apache.lucene.index.MergePolicy.MergeTrigger;
/**
* Merges segments of approximately equal size, subject to
* an allowed number of segments per tier. This is similar
@ -76,7 +73,11 @@ import org.apache.lucene.index.MergePolicy.MergeTrigger;
// maybe CMS should do so)
public class TieredMergePolicy extends MergePolicy {
/** Default noCFSRatio. If a merge's size is >= 10% of
* the index, then we disable compound file for it.
* @see MergePolicy#setNoCFSRatio */
public static final double DEFAULT_NO_CFS_RATIO = 0.1;
private int maxMergeAtOnce = 10;
private long maxMergedSegmentBytes = 5*1024*1024*1024L;
private int maxMergeAtOnceExplicit = 30;
@ -84,14 +85,12 @@ public class TieredMergePolicy extends MergePolicy {
private long floorSegmentBytes = 2*1024*1024L;
private double segsPerTier = 10.0;
private double forceMergeDeletesPctAllowed = 10.0;
private boolean useCompoundFile = true;
private double noCFSRatio = 0.1;
private long maxCFSSegmentSize = Long.MAX_VALUE;
private double reclaimDeletesWeight = 2.0;
/** Sole constructor, setting all settings to their
* defaults. */
public TieredMergePolicy() {
super(DEFAULT_NO_CFS_RATIO, MergePolicy.DEFAULT_MAX_CFS_SEGMENT_SIZE);
}
/** Maximum number of segments to be merged at a time
@ -233,41 +232,6 @@ public class TieredMergePolicy extends MergePolicy {
return segsPerTier;
}
/** Sets whether compound file format should be used for
* newly flushed and newly merged segments. Default
* true. */
public TieredMergePolicy setUseCompoundFile(boolean useCompoundFile) {
this.useCompoundFile = useCompoundFile;
return this;
}
/** Returns the current useCompoundFile setting.
*
* @see #setUseCompoundFile */
public boolean getUseCompoundFile() {
return useCompoundFile;
}
/** If a merged segment will be more than this percentage
* of the total size of the index, leave the segment as
* non-compound file even if compound file is enabled.
* Set to 1.0 to always use CFS regardless of merge
* size. Default is 0.1. */
public TieredMergePolicy setNoCFSRatio(double noCFSRatio) {
if (noCFSRatio < 0.0 || noCFSRatio > 1.0) {
throw new IllegalArgumentException("noCFSRatio must be 0.0 to 1.0 inclusive; got " + noCFSRatio);
}
this.noCFSRatio = noCFSRatio;
return this;
}
/** Returns the current noCFSRatio setting.
*
* @see #setNoCFSRatio */
public double getNoCFSRatio() {
return noCFSRatio;
}
private class SegmentByteSizeDescending implements Comparator<SegmentInfoPerCommit> {
@Override
public int compare(SegmentInfoPerCommit o1, SegmentInfoPerCommit o2) {
@ -636,47 +600,10 @@ public class TieredMergePolicy extends MergePolicy {
return spec;
}
@Override
public boolean useCompoundFile(SegmentInfos infos, SegmentInfoPerCommit mergedInfo) throws IOException {
if (!getUseCompoundFile()) {
return false;
}
long mergedInfoSize = size(mergedInfo);
if (mergedInfoSize > maxCFSSegmentSize) {
return false;
}
if (getNoCFSRatio() >= 1.0) {
return true;
}
long totalSize = 0;
for (SegmentInfoPerCommit info : infos) {
totalSize += size(info);
}
return mergedInfoSize <= getNoCFSRatio() * totalSize;
}
@Override
public void close() {
}
private boolean isMerged(SegmentInfoPerCommit info) {
IndexWriter w = writer.get();
assert w != null;
boolean hasDeletions = w.numDeletedDocs(info) > 0;
return !hasDeletions &&
info.info.dir == w.getDirectory() &&
(info.info.getUseCompoundFile() == useCompoundFile || noCFSRatio < 1.0 || maxCFSSegmentSize < Long.MAX_VALUE);
}
// Segment size in bytes, pro-rated by % deleted
private long size(SegmentInfoPerCommit info) throws IOException {
final long byteSize = info.sizeInBytes();
final int delCount = writer.get().numDeletedDocs(info);
final double delRatio = (info.info.getDocCount() <= 0 ? 0.0f : ((double)delCount / (double)info.info.getDocCount()));
assert delRatio <= 1.0;
return (long) (byteSize * (1.0-delRatio));
}
private long floorSize(long bytes) {
return Math.max(floorSegmentBytes, bytes);
}
@ -699,28 +626,8 @@ public class TieredMergePolicy extends MergePolicy {
sb.append("floorSegmentMB=").append(floorSegmentBytes/1024/1024.).append(", ");
sb.append("forceMergeDeletesPctAllowed=").append(forceMergeDeletesPctAllowed).append(", ");
sb.append("segmentsPerTier=").append(segsPerTier).append(", ");
sb.append("useCompoundFile=").append(useCompoundFile).append(", ");
sb.append("maxCFSSegmentSizeMB=").append(getMaxCFSSegmentSizeMB()).append(", ");
sb.append("noCFSRatio=").append(noCFSRatio);
return sb.toString();
}
/** Returns the largest size allowed for a compound file segment */
public final double getMaxCFSSegmentSizeMB() {
return maxCFSSegmentSize/1024/1024.;
}
/** If a merged segment will be more than this value,
* leave the segment as
* non-compound file even if compound file is enabled.
* Set this to Double.POSITIVE_INFINITY (default) and noCFSRatio to 1.0
* to always use CFS regardless of merge size. */
public final TieredMergePolicy setMaxCFSSegmentSizeMB(double v) {
if (v < 0.0) {
throw new IllegalArgumentException("maxCFSSegmentSizeMB must be >=0 (got " + v + ")");
}
v *= 1024 * 1024;
this.maxCFSSegmentSize = (v > Long.MAX_VALUE) ? Long.MAX_VALUE : (long) v;
return this;
}
}

View File

@ -112,10 +112,7 @@ public class TestSearch extends LuceneTestCase {
Analyzer analyzer = new MockAnalyzer(random);
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
MergePolicy mp = conf.getMergePolicy();
if (mp instanceof LogMergePolicy) {
((LogMergePolicy) mp).setUseCompoundFile(useCompoundFile);
}
mp.setNoCFSRatio(useCompoundFile ? 1.0 : 0.0);
IndexWriter writer = new IndexWriter(directory, conf);
String[] docs = {

View File

@ -72,9 +72,7 @@ public class TestSearchForDuplicates extends LuceneTestCase {
Analyzer analyzer = new MockAnalyzer(random);
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
final MergePolicy mp = conf.getMergePolicy();
if (mp instanceof LogMergePolicy) {
((LogMergePolicy) mp).setUseCompoundFile(useCompoundFiles);
}
mp.setNoCFSRatio(useCompoundFiles ? 1.0 : 0.0);
IndexWriter writer = new IndexWriter(directory, conf);
if (VERBOSE) {
System.out.println("TEST: now build index MAX_DOCS=" + MAX_DOCS);

View File

@ -49,6 +49,7 @@ public class TestCompressingStoredFieldsFormat extends BaseStoredFieldsFormatTes
iwConf.setCodec(CompressingCodec.randomInstance(random()));
// disable CFS because this test checks file names
iwConf.setMergePolicy(newLogMergePolicy(false));
iwConf.setUseCompoundFile(false);
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf);
final Document validDoc = new Document();

View File

@ -58,7 +58,7 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase {
private IndexWriter newWriter(Directory dir, IndexWriterConfig conf)
throws IOException {
LogDocMergePolicy logByteSizeMergePolicy = new LogDocMergePolicy();
logByteSizeMergePolicy.setUseCompoundFile(false); // make sure we use plain
logByteSizeMergePolicy.setNoCFSRatio(0.0); // make sure we use plain
// files
conf.setMergePolicy(logByteSizeMergePolicy);
@ -146,7 +146,7 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase {
iwconf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
.setOpenMode(OpenMode.APPEND).setCodec(codec);
//((LogMergePolicy) iwconf.getMergePolicy()).setUseCompoundFile(false);
//((LogMergePolicy) iwconf.getMergePolicy()).setNoCFSRatio(0.0);
//((LogMergePolicy) iwconf.getMergePolicy()).setMergeFactor(10);
iwconf.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);

View File

@ -597,7 +597,7 @@ public class TestAddIndexes extends LuceneTestCase {
Directory dir = newDirectory();
LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy();
lmp.setUseCompoundFile(false);
lmp.setNoCFSRatio(0.0);
lmp.setMergeFactor(100);
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(random()))
@ -626,7 +626,7 @@ public class TestAddIndexes extends LuceneTestCase {
Directory dir2 = newDirectory();
lmp = new LogByteSizeMergePolicy();
lmp.setMinMergeMB(0.0001);
lmp.setUseCompoundFile(false);
lmp.setNoCFSRatio(0.0);
lmp.setMergeFactor(4);
writer = new IndexWriter(dir2, newIndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer(random()))
@ -1095,7 +1095,7 @@ public class TestAddIndexes extends LuceneTestCase {
Directory dir = new MockDirectoryWrapper(random(), new RAMDirectory());
IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy(true));
LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy();
MergePolicy lmp = conf.getMergePolicy();
// Force creation of CFS:
lmp.setNoCFSRatio(1.0);
lmp.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY);

View File

@ -573,8 +573,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
_TestUtil.rmDir(indexDir);
Directory dir = newFSDirectory(indexDir);
LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy();
mp.setUseCompoundFile(doCFS);
mp.setNoCFSRatio(1.0);
mp.setNoCFSRatio(doCFS ? 1.0 : 0.0);
mp.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY);
// TODO: remove randomness
IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
@ -593,8 +592,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
if (!fullyMerged) {
// open fresh writer so we get no prx file in the added segment
mp = new LogByteSizeMergePolicy();
mp.setUseCompoundFile(doCFS);
mp.setNoCFSRatio(1.0);
mp.setNoCFSRatio(doCFS ? 1.0 : 0.0);
// TODO: remove randomness
conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
.setMaxBufferedDocs(10).setMergePolicy(mp);
@ -626,7 +624,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
try {
Directory dir = newFSDirectory(outputDir);
LogMergePolicy mergePolicy = newLogMergePolicy(true, 10);
MergePolicy mergePolicy = newLogMergePolicy(true, 10);
// This test expects all of its segments to be in CFS:
mergePolicy.setNoCFSRatio(1.0);
@ -637,7 +635,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).
setMaxBufferedDocs(-1).
setRAMBufferSizeMB(16.0).
setMergePolicy(mergePolicy)
setMergePolicy(mergePolicy).setUseCompoundFile(true)
);
for(int i=0;i<35;i++) {
addDoc(writer, i);
@ -649,7 +647,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
writer = new IndexWriter(
dir,
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
.setMergePolicy(NoMergePolicy.NO_COMPOUND_FILES)
.setMergePolicy(NoMergePolicy.NO_COMPOUND_FILES).setUseCompoundFile(true)
);
Term searchTerm = new Term("id", "7");
writer.deleteDocuments(searchTerm);

View File

@ -225,9 +225,7 @@ public class TestDeletionPolicy extends LuceneTestCase {
new MockAnalyzer(random()))
.setIndexDeletionPolicy(new ExpirationTimeDeletionPolicy(dir, SECONDS));
MergePolicy mp = conf.getMergePolicy();
if (mp instanceof LogMergePolicy) {
((LogMergePolicy) mp).setUseCompoundFile(true);
}
mp.setNoCFSRatio(1.0);
IndexWriter writer = new IndexWriter(dir, conf);
ExpirationTimeDeletionPolicy policy = (ExpirationTimeDeletionPolicy) writer.getConfig().getIndexDeletionPolicy();
Map<String,String> commitData = new HashMap<String,String>();
@ -246,9 +244,7 @@ public class TestDeletionPolicy extends LuceneTestCase {
new MockAnalyzer(random())).setOpenMode(
OpenMode.APPEND).setIndexDeletionPolicy(policy);
mp = conf.getMergePolicy();
if (mp instanceof LogMergePolicy) {
((LogMergePolicy) mp).setUseCompoundFile(true);
}
mp.setNoCFSRatio(1.0);
writer = new IndexWriter(dir, conf);
policy = (ExpirationTimeDeletionPolicy) writer.getConfig().getIndexDeletionPolicy();
for(int j=0;j<17;j++) {
@ -326,9 +322,7 @@ public class TestDeletionPolicy extends LuceneTestCase {
.setMaxBufferedDocs(10)
.setMergeScheduler(new SerialMergeScheduler());
MergePolicy mp = conf.getMergePolicy();
if (mp instanceof LogMergePolicy) {
((LogMergePolicy) mp).setUseCompoundFile(useCompoundFile);
}
mp.setNoCFSRatio(useCompoundFile ? 1.0 : 0.0);
IndexWriter writer = new IndexWriter(dir, conf);
KeepAllDeletionPolicy policy = (KeepAllDeletionPolicy) writer.getConfig().getIndexDeletionPolicy();
for(int i=0;i<107;i++) {
@ -347,9 +341,7 @@ public class TestDeletionPolicy extends LuceneTestCase {
new MockAnalyzer(random())).setOpenMode(
OpenMode.APPEND).setIndexDeletionPolicy(policy);
mp = conf.getMergePolicy();
if (mp instanceof LogMergePolicy) {
((LogMergePolicy) mp).setUseCompoundFile(useCompoundFile);
}
mp.setNoCFSRatio(useCompoundFile ? 1.0 : 0.0);
if (VERBOSE) {
System.out.println("TEST: open writer for forceMerge");
}
@ -526,9 +518,7 @@ public class TestDeletionPolicy extends LuceneTestCase {
.setIndexDeletionPolicy(new KeepNoneOnInitDeletionPolicy())
.setMaxBufferedDocs(10);
MergePolicy mp = conf.getMergePolicy();
if (mp instanceof LogMergePolicy) {
((LogMergePolicy) mp).setUseCompoundFile(useCompoundFile);
}
mp.setNoCFSRatio(useCompoundFile ? 1.0 : 0.0);
IndexWriter writer = new IndexWriter(dir, conf);
KeepNoneOnInitDeletionPolicy policy = (KeepNoneOnInitDeletionPolicy) writer.getConfig().getIndexDeletionPolicy();
for(int i=0;i<107;i++) {
@ -539,9 +529,7 @@ public class TestDeletionPolicy extends LuceneTestCase {
conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
.setOpenMode(OpenMode.APPEND).setIndexDeletionPolicy(policy);
mp = conf.getMergePolicy();
if (mp instanceof LogMergePolicy) {
((LogMergePolicy) mp).setUseCompoundFile(true);
}
mp.setNoCFSRatio(1.0);
writer = new IndexWriter(dir, conf);
policy = (KeepNoneOnInitDeletionPolicy) writer.getConfig().getIndexDeletionPolicy();
writer.forceMerge(1);
@ -581,9 +569,7 @@ public class TestDeletionPolicy extends LuceneTestCase {
.setIndexDeletionPolicy(policy)
.setMaxBufferedDocs(10);
MergePolicy mp = conf.getMergePolicy();
if (mp instanceof LogMergePolicy) {
((LogMergePolicy) mp).setUseCompoundFile(useCompoundFile);
}
mp.setNoCFSRatio(useCompoundFile ? 1.0 : 0.0);
IndexWriter writer = new IndexWriter(dir, conf);
policy = (KeepLastNDeletionPolicy) writer.getConfig().getIndexDeletionPolicy();
for(int i=0;i<17;i++) {
@ -642,9 +628,7 @@ public class TestDeletionPolicy extends LuceneTestCase {
.setIndexDeletionPolicy(new KeepLastNDeletionPolicy(N))
.setMaxBufferedDocs(10);
MergePolicy mp = conf.getMergePolicy();
if (mp instanceof LogMergePolicy) {
((LogMergePolicy) mp).setUseCompoundFile(useCompoundFile);
}
mp.setNoCFSRatio(useCompoundFile ? 1.0 : 0.0);
IndexWriter writer = new IndexWriter(dir, conf);
KeepLastNDeletionPolicy policy = (KeepLastNDeletionPolicy) writer.getConfig().getIndexDeletionPolicy();
writer.close();
@ -658,9 +642,7 @@ public class TestDeletionPolicy extends LuceneTestCase {
.setOpenMode(OpenMode.APPEND).setIndexDeletionPolicy(policy)
.setMaxBufferedDocs(10);
mp = conf.getMergePolicy();
if (mp instanceof LogMergePolicy) {
((LogMergePolicy) mp).setUseCompoundFile(useCompoundFile);
}
mp.setNoCFSRatio(useCompoundFile ? 1.0 : 0.0);
writer = new IndexWriter(dir, conf);
policy = (KeepLastNDeletionPolicy) writer.getConfig().getIndexDeletionPolicy();
for(int j=0;j<17;j++) {

View File

@ -61,7 +61,7 @@ public class TestFieldsReader extends LuceneTestCase {
}
dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy());
((LogMergePolicy) conf.getMergePolicy()).setUseCompoundFile(false);
conf.getMergePolicy().setNoCFSRatio(0.0);
IndexWriter writer = new IndexWriter(dir, conf);
writer.addDocument(testDoc);
writer.close();

View File

@ -44,7 +44,7 @@ public class TestIndexFileDeleter extends LuceneTestCase {
((MockDirectoryWrapper)dir).setPreventDoubleWrite(false);
}
LogMergePolicy mergePolicy = newLogMergePolicy(true, 10);
MergePolicy mergePolicy = newLogMergePolicy(true, 10);
// This test expects all of its segments to be in CFS
mergePolicy.setNoCFSRatio(1.0);
@ -54,14 +54,15 @@ public class TestIndexFileDeleter extends LuceneTestCase {
dir,
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).
setMaxBufferedDocs(10).
setMergePolicy(mergePolicy)
setMergePolicy(mergePolicy).setUseCompoundFile(true)
);
int i;
for(i=0;i<35;i++) {
addDoc(writer, i);
}
((LogMergePolicy) writer.getConfig().getMergePolicy()).setUseCompoundFile(false);
writer.getConfig().getMergePolicy().setNoCFSRatio(0.0);
writer.getConfig().setUseCompoundFile(false);
for(;i<45;i++) {
addDoc(writer, i);
}
@ -71,7 +72,7 @@ public class TestIndexFileDeleter extends LuceneTestCase {
writer = new IndexWriter(
dir,
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).
setMergePolicy(NoMergePolicy.NO_COMPOUND_FILES)
setMergePolicy(NoMergePolicy.NO_COMPOUND_FILES).setUseCompoundFile(true)
);
Term searchTerm = new Term("id", "7");
writer.deleteDocuments(searchTerm);

View File

@ -704,7 +704,7 @@ public class TestIndexWriter extends LuceneTestCase {
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random())).setMaxBufferedDocs(2).setMergePolicy(newLogMergePolicy()));
//LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy();
//lmp.setMergeFactor(2);
//lmp.setUseCompoundFile(false);
//lmp.setNoCFSRatio(0.0);
Document doc = new Document();
String contents = "aa bb cc dd ee ff gg hh ii jj kk";
@ -732,7 +732,7 @@ public class TestIndexWriter extends LuceneTestCase {
if (0 == i % 4) {
writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random())));
//LogMergePolicy lmp2 = (LogMergePolicy) writer.getConfig().getMergePolicy();
//lmp2.setUseCompoundFile(false);
//lmp2.setNoCFSRatio(0.0);
writer.forceMerge(1);
writer.close();
}
@ -1339,7 +1339,7 @@ public class TestIndexWriter extends LuceneTestCase {
for(int iter=0;iter<2;iter++) {
Directory dir = newMockDirectory(); // relies on windows semantics
LogMergePolicy mergePolicy = newLogMergePolicy(true);
MergePolicy mergePolicy = newLogMergePolicy(true);
// This test expects all of its segments to be in CFS
mergePolicy.setNoCFSRatio(1.0);
@ -1348,7 +1348,7 @@ public class TestIndexWriter extends LuceneTestCase {
IndexWriter w = new IndexWriter(
dir,
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).
setMergePolicy(mergePolicy)
setMergePolicy(mergePolicy).setUseCompoundFile(true)
);
Document doc = new Document();
doc.add(newTextField("field", "go", Field.Store.NO));
@ -1468,7 +1468,7 @@ public class TestIndexWriter extends LuceneTestCase {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(random()))
.setMaxBufferedDocs(2).setMergePolicy(newLogMergePolicy()));
.setMaxBufferedDocs(2).setMergePolicy(newLogMergePolicy()).setUseCompoundFile(false));
String[] files = dir.listAll();
// Creating over empty dir should not create any files,
@ -1550,7 +1550,7 @@ public class TestIndexWriter extends LuceneTestCase {
Directory dir = newDirectory();
IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setRAMBufferSizeMB(0.01).setMergePolicy(newLogMergePolicy()));
((LogMergePolicy) indexWriter.getConfig().getMergePolicy()).setUseCompoundFile(false);
indexWriter.getConfig().getMergePolicy().setNoCFSRatio(0.0);
String BIG="alskjhlaksjghlaksjfhalksvjepgjioefgjnsdfjgefgjhelkgjhqewlrkhgwlekgrhwelkgjhwelkgrhwlkejg";
BIG=BIG+BIG+BIG+BIG;

View File

@ -78,6 +78,7 @@ public class TestIndexWriterConfig extends LuceneTestCase {
assertEquals(IndexWriterConfig.DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB, conf.getRAMPerThreadHardLimitMB());
assertEquals(Codec.getDefault(), conf.getCodec());
assertEquals(InfoStream.getDefault(), conf.getInfoStream());
assertEquals(IndexWriterConfig.DEFAULT_USE_COMPOUND_FILE_SYSTEM, conf.getUseCompoundFile());
// Sanity check - validate that all getters are covered.
Set<String> getters = new HashSet<String>();
getters.add("getAnalyzer");
@ -104,6 +105,7 @@ public class TestIndexWriterConfig extends LuceneTestCase {
getters.add("getRAMPerThreadHardLimitMB");
getters.add("getCodec");
getters.add("getInfoStream");
getters.add("getUseCompoundFile");
for (Method m : IndexWriterConfig.class.getDeclaredMethods()) {
if (m.getDeclaringClass() == IndexWriterConfig.class && m.getName().startsWith("get")) {
@ -188,6 +190,7 @@ public class TestIndexWriterConfig extends LuceneTestCase {
assertEquals(IndexWriterConfig.DISABLE_AUTO_FLUSH, IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS);
assertEquals(16.0, IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB, 0.0);
assertEquals(false, IndexWriterConfig.DEFAULT_READER_POOLING);
assertEquals(true, IndexWriterConfig.DEFAULT_USE_COMPOUND_FILE_SYSTEM);
assertEquals(DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, IndexWriterConfig.DEFAULT_READER_TERMS_INDEX_DIVISOR);
}
@ -372,14 +375,13 @@ public class TestIndexWriterConfig extends LuceneTestCase {
iwc.setMergePolicy(newLogMergePolicy(true));
// Start false:
((LogMergePolicy) iwc.getMergePolicy()).setUseCompoundFile(false);
iwc.getMergePolicy().setNoCFSRatio(0.0);
IndexWriter w = new IndexWriter(dir, iwc);
// Change to true:
LogMergePolicy lmp = ((LogMergePolicy) w.getConfig().getMergePolicy());
MergePolicy lmp = w.getConfig().getMergePolicy();
lmp.setNoCFSRatio(1.0);
lmp.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY);
lmp.setUseCompoundFile(true);
Document doc = new Document();
doc.add(newStringField("field", "foo", Store.NO));

View File

@ -768,8 +768,8 @@ public class TestIndexWriterDelete extends LuceneTestCase {
IndexWriter modifier = new IndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)).setMaxBufferedDeleteTerms(2).setReaderPooling(false).setMergePolicy(newLogMergePolicy()));
LogMergePolicy lmp = (LogMergePolicy) modifier.getConfig().getMergePolicy();
lmp.setUseCompoundFile(true);
MergePolicy lmp = modifier.getConfig().getMergePolicy();
lmp.setNoCFSRatio(1.0);
dir.failOn(failure.reset());

View File

@ -1143,9 +1143,9 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
writer = new IndexWriter(
dir,
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).
setMergePolicy(newLogMergePolicy(true))
setMergePolicy(newLogMergePolicy(true)).setUseCompoundFile(true)
);
LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy();
MergePolicy lmp = writer.getConfig().getMergePolicy();
// Force creation of CFS:
lmp.setNoCFSRatio(1.0);
lmp.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY);

View File

@ -271,7 +271,7 @@ public class TestIndexWriterMergePolicy extends LuceneTestCase {
assertSetters(new LogDocMergePolicy());
}
private void assertSetters(LogMergePolicy lmp) {
private void assertSetters(MergePolicy lmp) {
lmp.setMaxCFSSegmentSizeMB(2.0);
assertEquals(2.0, lmp.getMaxCFSSegmentSizeMB(), EPSILON);

View File

@ -65,8 +65,8 @@ public class TestNoMergePolicy extends LuceneTestCase {
if (m.getName().equals("clone")) {
continue;
}
if (m.getDeclaringClass() != Object.class) {
assertTrue(m + " is not overridden !", m.getDeclaringClass() == NoMergePolicy.class);
if (m.getDeclaringClass() != Object.class && !Modifier.isFinal(m.getModifiers())) {
assertTrue(m + " is not overridden ! ", m.getDeclaringClass() == NoMergePolicy.class);
}
}
}

View File

@ -193,7 +193,7 @@ public class TestOmitNorms extends LuceneTestCase {
TEST_VERSION_CURRENT, analyzer).setMaxBufferedDocs(3).setMergePolicy(newLogMergePolicy()));
LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy();
lmp.setMergeFactor(2);
lmp.setUseCompoundFile(false);
lmp.setNoCFSRatio(0.0);
Document d = new Document();
FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);

View File

@ -194,7 +194,7 @@ public class TestOmitPositions extends LuceneTestCase {
TEST_VERSION_CURRENT, analyzer).setMaxBufferedDocs(3).setMergePolicy(newLogMergePolicy()));
LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy();
lmp.setMergeFactor(2);
lmp.setUseCompoundFile(false);
lmp.setNoCFSRatio(0.0);
Document d = new Document();
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);

View File

@ -215,7 +215,7 @@ public class TestOmitTf extends LuceneTestCase {
TEST_VERSION_CURRENT, analyzer).setMaxBufferedDocs(3).setMergePolicy(newLogMergePolicy()));
LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy();
lmp.setMergeFactor(2);
lmp.setUseCompoundFile(false);
lmp.setNoCFSRatio(0.0);
Document d = new Document();
Field f1 = newField("f1", "This field has term freqs", omitType);

View File

@ -151,7 +151,7 @@ public class TestStressIndexing2 extends LuceneTestCase {
0.1).setMaxBufferedDocs(maxBufferedDocs).setMergePolicy(newLogMergePolicy()), new YieldTestPoint());
w.commit();
LogMergePolicy lmp = (LogMergePolicy) w.getConfig().getMergePolicy();
lmp.setUseCompoundFile(false);
lmp.setNoCFSRatio(0.0);
lmp.setMergeFactor(mergeFactor);
/***
w.setMaxMergeDocs(Integer.MAX_VALUE);
@ -202,7 +202,7 @@ public class TestStressIndexing2 extends LuceneTestCase {
.setRAMBufferSizeMB(0.1).setMaxBufferedDocs(maxBufferedDocs).setIndexerThreadPool(new ThreadAffinityDocumentsWriterThreadPool(maxThreadStates))
.setReaderPooling(doReaderPooling).setMergePolicy(newLogMergePolicy()), new YieldTestPoint());
LogMergePolicy lmp = (LogMergePolicy) w.getConfig().getMergePolicy();
lmp.setUseCompoundFile(false);
lmp.setNoCFSRatio(0.0);
lmp.setMergeFactor(mergeFactor);
threads = new IndexingThread[nThreads];

View File

@ -93,7 +93,7 @@ public class TestTermVectorsReader extends LuceneTestCase {
dir,
newIndexWriterConfig(TEST_VERSION_CURRENT, new MyAnalyzer()).
setMaxBufferedDocs(-1).
setMergePolicy(newLogMergePolicy(false, 10))
setMergePolicy(newLogMergePolicy(false, 10)).setUseCompoundFile(false)
);
Document doc = new Document();

View File

@ -40,7 +40,7 @@ public class TestTryDelete extends LuceneTestCase
private static IndexWriter getWriter (Directory directory)
throws IOException
{
LogMergePolicy policy = new LogByteSizeMergePolicy();
MergePolicy policy = new LogByteSizeMergePolicy();
IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer(random()));
conf.setMergePolicy(policy);

View File

@ -41,7 +41,7 @@ public class TestMultiThreadTermVectors extends LuceneTestCase {
super.setUp();
directory = newDirectory();
IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
//writer.setUseCompoundFile(false);
//writer.setNoCFSRatio(0.0);
//writer.infoStream = System.out;
FieldType customType = new FieldType(TextField.TYPE_STORED);
customType.setTokenized(false);

View File

@ -50,7 +50,7 @@ public class TestTermVectors extends LuceneTestCase {
public static void beforeClass() throws Exception {
directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true)).setMergePolicy(newLogMergePolicy()));
//writer.setUseCompoundFile(true);
//writer.setNoCFSRatio(1.0);
//writer.infoStream = System.out;
for (int i = 0; i < 1000; i++) {
Document doc = new Document();

View File

@ -54,7 +54,7 @@ public class TestFileSwitchDirectory extends LuceneTestCase {
IndexWriter writer = new IndexWriter(
fsd,
new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).
setMergePolicy(newLogMergePolicy(false)).setCodec(Codec.forName("Lucene40"))
setMergePolicy(newLogMergePolicy(false)).setCodec(Codec.forName("Lucene40")).setUseCompoundFile(false)
);
TestIndexWriterReader.createIndexNoClose(true, "ram", writer);
IndexReader reader = DirectoryReader.open(writer, true);

View File

@ -41,7 +41,7 @@ public class TestIndexSplitter extends LuceneTestCase {
((MockDirectoryWrapper)fsDir).setAssertNoUnrefencedFilesOnClose(false);
}
LogMergePolicy mergePolicy = new LogByteSizeMergePolicy();
MergePolicy mergePolicy = new LogByteSizeMergePolicy();
mergePolicy.setNoCFSRatio(1.0);
mergePolicy.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY);
IndexWriter iw = new IndexWriter(

View File

@ -94,7 +94,7 @@ public abstract class BaseStoredFieldsFormatTestCase extends LuceneTestCase {
Directory dir = newDirectory();
Random rand = random();
RandomIndexWriter w = new RandomIndexWriter(rand, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setMaxBufferedDocs(_TestUtil.nextInt(rand, 5, 20)));
//w.w.setUseCompoundFile(false);
//w.w.setNoCFSRatio(0.0);
final int docCount = atLeast(200);
final int fieldCount = _TestUtil.nextInt(rand, 1, 5);

View File

@ -272,7 +272,7 @@ class DocHelper {
public static SegmentInfoPerCommit writeDoc(Random random, Directory dir, Analyzer analyzer, Similarity similarity, Document doc) throws IOException {
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig( /* LuceneTestCase.newIndexWriterConfig(random, */
TEST_VERSION_CURRENT, analyzer).setSimilarity(similarity == null ? IndexSearcher.getDefaultSimilarity() : similarity));
//writer.setUseCompoundFile(false);
//writer.setNoCFSRatio(0.0);
writer.addDocument(doc);
writer.commit();
SegmentInfoPerCommit info = writer.newestSegment();

View File

@ -808,6 +808,7 @@ public abstract class LuceneTestCase extends Assert {
if (rarely(r)) {
c.setMergedSegmentWarmer(new SimpleMergedSegmentWarmer(c.getInfoStream()));
}
c.setUseCompoundFile(r.nextBoolean());
c.setReaderPooling(r.nextBoolean());
c.setReaderTermsIndexDivisor(_TestUtil.nextInt(r, 1, 4));
return c;
@ -831,20 +832,29 @@ public abstract class LuceneTestCase extends Assert {
public static LogMergePolicy newLogMergePolicy(Random r) {
LogMergePolicy logmp = r.nextBoolean() ? new LogDocMergePolicy() : new LogByteSizeMergePolicy();
logmp.setUseCompoundFile(r.nextBoolean());
logmp.setCalibrateSizeByDeletes(r.nextBoolean());
if (rarely(r)) {
logmp.setMergeFactor(_TestUtil.nextInt(r, 2, 9));
} else {
logmp.setMergeFactor(_TestUtil.nextInt(r, 10, 50));
}
logmp.setUseCompoundFile(r.nextBoolean());
logmp.setNoCFSRatio(0.1 + r.nextDouble()*0.8);
if (rarely()) {
logmp.setMaxCFSSegmentSizeMB(0.2 + r.nextDouble() * 2.0);
}
configureRandom(r, logmp);
return logmp;
}
private static void configureRandom(Random r, MergePolicy mergePolicy) {
if (r.nextBoolean()) {
mergePolicy.setNoCFSRatio(0.1 + r.nextDouble()*0.8);
} else {
mergePolicy.setNoCFSRatio(r.nextBoolean() ? 1.0 : 0.0);
}
if (rarely()) {
mergePolicy.setMaxCFSSegmentSizeMB(0.2 + r.nextDouble() * 2.0);
} else {
mergePolicy.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY);
}
}
public static TieredMergePolicy newTieredMergePolicy(Random r) {
TieredMergePolicy tmp = new TieredMergePolicy();
@ -867,29 +877,25 @@ public abstract class LuceneTestCase extends Assert {
} else {
tmp.setSegmentsPerTier(_TestUtil.nextInt(r, 10, 50));
}
tmp.setUseCompoundFile(r.nextBoolean());
tmp.setNoCFSRatio(0.1 + r.nextDouble()*0.8);
if (rarely()) {
tmp.setMaxCFSSegmentSizeMB(0.2 + r.nextDouble() * 2.0);
}
configureRandom(r, tmp);
tmp.setReclaimDeletesWeight(r.nextDouble()*4);
return tmp;
}
public static LogMergePolicy newLogMergePolicy(boolean useCFS) {
LogMergePolicy logmp = newLogMergePolicy();
logmp.setUseCompoundFile(useCFS);
public static MergePolicy newLogMergePolicy(boolean useCFS) {
MergePolicy logmp = newLogMergePolicy();
logmp.setNoCFSRatio(useCFS ? 1.0 : 0.0);
return logmp;
}
public static LogMergePolicy newLogMergePolicy(boolean useCFS, int mergeFactor) {
public static MergePolicy newLogMergePolicy(boolean useCFS, int mergeFactor) {
LogMergePolicy logmp = newLogMergePolicy();
logmp.setUseCompoundFile(useCFS);
logmp.setNoCFSRatio(useCFS ? 1.0 : 0.0);
logmp.setMergeFactor(mergeFactor);
return logmp;
}
public static LogMergePolicy newLogMergePolicy(int mergeFactor) {
public static MergePolicy newLogMergePolicy(int mergeFactor) {
LogMergePolicy logmp = newLogMergePolicy();
logmp.setMergeFactor(mergeFactor);
return logmp;

View File

@ -260,17 +260,6 @@ public class _TestUtil {
}
}
// NOTE: only works for TMP and LMP!!
public static void setUseCompoundFile(MergePolicy mp, boolean v) {
if (mp instanceof TieredMergePolicy) {
((TieredMergePolicy) mp).setUseCompoundFile(v);
} else if (mp instanceof LogMergePolicy) {
((LogMergePolicy) mp).setUseCompoundFile(v);
} else {
throw new IllegalArgumentException("cannot set compound file for MergePolicy " + mp);
}
}
/** start and end are BOTH inclusive */
public static int nextInt(Random r, int start, int end) {
return RandomInts.randomIntBetween(r, start, end);
@ -766,12 +755,12 @@ public class _TestUtil {
if (mp instanceof LogMergePolicy) {
LogMergePolicy lmp = (LogMergePolicy) mp;
lmp.setMergeFactor(Math.min(5, lmp.getMergeFactor()));
lmp.setUseCompoundFile(true);
lmp.setNoCFSRatio(1.0);
} else if (mp instanceof TieredMergePolicy) {
TieredMergePolicy tmp = (TieredMergePolicy) mp;
tmp.setMaxMergeAtOnce(Math.min(5, tmp.getMaxMergeAtOnce()));
tmp.setSegmentsPerTier(Math.min(5, tmp.getSegmentsPerTier()));
tmp.setUseCompoundFile(true);
tmp.setNoCFSRatio(1.0);
}
MergeScheduler ms = w.getConfig().getMergeScheduler();
if (ms instanceof ConcurrentMergeScheduler) {

View File

@ -217,14 +217,14 @@ public class SolrIndexConfig {
if (maxMergeDocs != -1)
logMergePolicy.setMaxMergeDocs(maxMergeDocs);
logMergePolicy.setUseCompoundFile(useCompoundFile);
logMergePolicy.setNoCFSRatio(useCompoundFile ? 1.0 : 0.0);
if (mergeFactor != -1)
logMergePolicy.setMergeFactor(mergeFactor);
} else if (policy instanceof TieredMergePolicy) {
TieredMergePolicy tieredMergePolicy = (TieredMergePolicy) policy;
tieredMergePolicy.setUseCompoundFile(useCompoundFile);
tieredMergePolicy.setNoCFSRatio(useCompoundFile ? 1.0 : 0.0);
if (mergeFactor != -1) {
tieredMergePolicy.setMaxMergeAtOnce(mergeFactor);

View File

@ -40,7 +40,7 @@ public class TestMergePolicyConfig extends SolrTestCaseJ4 {
assertEquals(19, tieredMP.getMaxMergeAtOnceExplicit());
// make sure we apply compoundFile and mergeFactor
assertEquals(false, tieredMP.getUseCompoundFile());
assertEquals(0.0, tieredMP.getNoCFSRatio(), 0.0);
assertEquals(7, tieredMP.getMaxMergeAtOnce());
// make sure we overrode segmentsPerTier (split from maxMergeAtOnce out of mergeFactor)