mirror of
https://github.com/apache/lucene.git
synced 2025-03-03 06:49:38 +00:00
LUCENE-4132: introduce LiveIndexWriterConfig, returned from IndexWriter.getConfig()
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1351225 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
da70cee7ff
commit
a11013a990
@ -10,6 +10,12 @@ New features
|
||||
|
||||
* LUCENE-4108: add replaceTaxonomy to DirectoryTaxonomyWriter, which replaces
|
||||
the taxonomy in place with the given one. (Shai Erera)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-4132: IndexWriter.getConfig() now returns a LiveIndexWriterConfig object
|
||||
which can be used to change the IndexWriter's live settings. IndexWriterConfig
|
||||
is used only for initializing the IndexWriter. (Shai Erera)
|
||||
|
||||
======================= Lucene 4.0.0-ALPHA =======================
|
||||
|
||||
|
@ -133,7 +133,7 @@ final class DocumentsWriter {
|
||||
final DocumentsWriterFlushControl flushControl;
|
||||
|
||||
final Codec codec;
|
||||
DocumentsWriter(Codec codec, IndexWriterConfig config, Directory directory, IndexWriter writer, FieldNumbers globalFieldNumbers,
|
||||
DocumentsWriter(Codec codec, LiveIndexWriterConfig config, Directory directory, IndexWriter writer, FieldNumbers globalFieldNumbers,
|
||||
BufferedDeletesStream bufferedDeletesStream) throws IOException {
|
||||
this.codec = codec;
|
||||
this.directory = directory;
|
||||
|
@ -65,10 +65,9 @@ final class DocumentsWriterFlushControl implements MemoryController {
|
||||
private final FlushPolicy flushPolicy;
|
||||
private boolean closed = false;
|
||||
private final DocumentsWriter documentsWriter;
|
||||
private final IndexWriterConfig config;
|
||||
private final LiveIndexWriterConfig config;
|
||||
|
||||
DocumentsWriterFlushControl(DocumentsWriter documentsWriter,
|
||||
IndexWriterConfig config) {
|
||||
DocumentsWriterFlushControl(DocumentsWriter documentsWriter, LiveIndexWriterConfig config) {
|
||||
this.stallControl = new DocumentsWriterStallControl();
|
||||
this.perThreadPool = documentsWriter.perThreadPool;
|
||||
this.flushPolicy = documentsWriter.flushPolicy;
|
||||
|
@ -135,7 +135,7 @@ abstract class DocumentsWriterPerThreadPool implements Cloneable {
|
||||
numThreadStatesActive = 0;
|
||||
}
|
||||
|
||||
void initialize(DocumentsWriter documentsWriter, FieldNumbers globalFieldMap, IndexWriterConfig config) {
|
||||
void initialize(DocumentsWriter documentsWriter, FieldNumbers globalFieldMap, LiveIndexWriterConfig config) {
|
||||
this.documentsWriter.set(documentsWriter); // thread pool is bound to DW
|
||||
this.globalFieldMap.set(globalFieldMap);
|
||||
for (int i = 0; i < threadStates.length; i++) {
|
||||
|
@ -52,7 +52,7 @@ import org.apache.lucene.util.SetOnce;
|
||||
*/
|
||||
abstract class FlushPolicy implements Cloneable {
|
||||
protected SetOnce<DocumentsWriter> writer = new SetOnce<DocumentsWriter>();
|
||||
protected IndexWriterConfig indexWriterConfig;
|
||||
protected LiveIndexWriterConfig indexWriterConfig;
|
||||
|
||||
/**
|
||||
* Called for each delete term. If this is a delete triggered due to an update
|
||||
|
@ -258,7 +258,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
||||
|
||||
// The instance that was passed to the constructor. It is saved only in order
|
||||
// to allow users to query an IndexWriter settings.
|
||||
private final IndexWriterConfig config;
|
||||
private final LiveIndexWriterConfig config;
|
||||
|
||||
// The PayloadProcessorProvider to use when segments are merged
|
||||
private PayloadProcessorProvider payloadProcessorProvider;
|
||||
@ -586,11 +586,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
||||
*/
|
||||
public IndexWriter(Directory d, IndexWriterConfig conf)
|
||||
throws CorruptIndexException, LockObtainFailedException, IOException {
|
||||
if (conf.inUseByIndexWriter.get()) {
|
||||
throw new IllegalStateException("the provided IndexWriterConfig was previously used by a different IndexWriter; please make a new one instead");
|
||||
}
|
||||
config = conf.clone();
|
||||
config.inUseByIndexWriter.set(true);
|
||||
config = new LiveIndexWriterConfig(conf.clone());
|
||||
directory = d;
|
||||
analyzer = config.getAnalyzer();
|
||||
infoStream = config.getInfoStream();
|
||||
@ -757,17 +753,10 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the private {@link IndexWriterConfig}, cloned
|
||||
* from the {@link IndexWriterConfig} passed to
|
||||
* {@link #IndexWriter(Directory, IndexWriterConfig)}.
|
||||
* <p>
|
||||
* <b>NOTE:</b> some settings may be changed on the
|
||||
* returned {@link IndexWriterConfig}, and will take
|
||||
* effect in the current IndexWriter instance. See the
|
||||
* javadocs for the specific setters in {@link
|
||||
* IndexWriterConfig} for details.
|
||||
* Returns a {@link LiveIndexWriterConfig}, which can be used to query the IndexWriter
|
||||
* current settings, as well as modify "live" ones.
|
||||
*/
|
||||
public IndexWriterConfig getConfig() {
|
||||
public LiveIndexWriterConfig getConfig() {
|
||||
ensureOpen(false);
|
||||
return config;
|
||||
}
|
||||
|
@ -18,7 +18,6 @@ package org.apache.lucene.index;
|
||||
*/
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
@ -28,17 +27,14 @@ import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.util.InfoStream;
|
||||
import org.apache.lucene.util.PrintStreamInfoStream;
|
||||
import org.apache.lucene.util.SetOnce;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Holds all the configuration of {@link IndexWriter}. You
|
||||
* should instantiate this class, call the setters to set
|
||||
* your configuration, then pass it to {@link IndexWriter}.
|
||||
* Note that {@link IndexWriter} makes a private clone; if
|
||||
* you need to subsequently change settings use {@link
|
||||
* IndexWriter#getConfig}.
|
||||
*
|
||||
* Holds all the configuration that is used to create an {@link IndexWriter}.
|
||||
* Once {@link IndexWriter} has been created with this object, changes to this
|
||||
* object will not affect the {@link IndexWriter} instance. For that, use
|
||||
* {@link LiveIndexWriterConfig} that is returned from {@link IndexWriter#getConfig()}.
|
||||
*
|
||||
* <p>
|
||||
* All setter methods return {@link IndexWriterConfig} to allow chaining
|
||||
* settings conveniently, for example:
|
||||
@ -47,10 +43,12 @@ import org.apache.lucene.util.Version;
|
||||
* IndexWriterConfig conf = new IndexWriterConfig(analyzer);
|
||||
* conf.setter1().setter2();
|
||||
* </pre>
|
||||
*
|
||||
*
|
||||
* @see IndexWriter#getConfig()
|
||||
*
|
||||
* @since 3.1
|
||||
*/
|
||||
public final class IndexWriterConfig implements Cloneable {
|
||||
public final class IndexWriterConfig extends LiveIndexWriterConfig implements Cloneable {
|
||||
|
||||
/**
|
||||
* Specifies the open mode for {@link IndexWriter}.
|
||||
@ -131,33 +129,6 @@ public final class IndexWriterConfig implements Cloneable {
|
||||
return WRITE_LOCK_TIMEOUT;
|
||||
}
|
||||
|
||||
private final Analyzer analyzer;
|
||||
private volatile IndexDeletionPolicy delPolicy;
|
||||
private volatile IndexCommit commit;
|
||||
private volatile OpenMode openMode;
|
||||
private volatile Similarity similarity;
|
||||
private volatile int termIndexInterval; // TODO: this should be private to the codec, not settable here
|
||||
private volatile MergeScheduler mergeScheduler;
|
||||
private volatile long writeLockTimeout;
|
||||
private volatile int maxBufferedDeleteTerms;
|
||||
private volatile double ramBufferSizeMB;
|
||||
private volatile int maxBufferedDocs;
|
||||
private volatile IndexingChain indexingChain;
|
||||
private volatile IndexReaderWarmer mergedSegmentWarmer;
|
||||
private volatile Codec codec;
|
||||
private volatile InfoStream infoStream;
|
||||
private volatile MergePolicy mergePolicy;
|
||||
private volatile DocumentsWriterPerThreadPool indexerThreadPool;
|
||||
private volatile boolean readerPooling;
|
||||
private volatile int readerTermsIndexDivisor;
|
||||
private volatile FlushPolicy flushPolicy;
|
||||
private volatile int perThreadHardLimitMB;
|
||||
|
||||
private Version matchVersion;
|
||||
|
||||
// Used directly by IndexWriter:
|
||||
AtomicBoolean inUseByIndexWriter = new AtomicBoolean();
|
||||
|
||||
/**
|
||||
* Creates a new config that with defaults that match the specified
|
||||
* {@link Version} as well as the default {@link
|
||||
@ -170,59 +141,27 @@ public final class IndexWriterConfig implements Cloneable {
|
||||
* {@link LogDocMergePolicy}.
|
||||
*/
|
||||
public IndexWriterConfig(Version matchVersion, Analyzer analyzer) {
|
||||
this.matchVersion = matchVersion;
|
||||
this.analyzer = analyzer;
|
||||
delPolicy = new KeepOnlyLastCommitDeletionPolicy();
|
||||
commit = null;
|
||||
openMode = OpenMode.CREATE_OR_APPEND;
|
||||
similarity = IndexSearcher.getDefaultSimilarity();
|
||||
termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL; // TODO: this should be private to the codec, not settable here
|
||||
mergeScheduler = new ConcurrentMergeScheduler();
|
||||
writeLockTimeout = WRITE_LOCK_TIMEOUT;
|
||||
maxBufferedDeleteTerms = DEFAULT_MAX_BUFFERED_DELETE_TERMS;
|
||||
ramBufferSizeMB = DEFAULT_RAM_BUFFER_SIZE_MB;
|
||||
maxBufferedDocs = DEFAULT_MAX_BUFFERED_DOCS;
|
||||
indexingChain = DocumentsWriterPerThread.defaultIndexingChain;
|
||||
mergedSegmentWarmer = null;
|
||||
codec = Codec.getDefault();
|
||||
infoStream = InfoStream.getDefault();
|
||||
mergePolicy = new TieredMergePolicy();
|
||||
flushPolicy = new FlushByRamOrCountsPolicy();
|
||||
readerPooling = DEFAULT_READER_POOLING;
|
||||
indexerThreadPool = new ThreadAffinityDocumentsWriterThreadPool(DEFAULT_MAX_THREAD_STATES);
|
||||
readerTermsIndexDivisor = DEFAULT_READER_TERMS_INDEX_DIVISOR;
|
||||
perThreadHardLimitMB = DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB;
|
||||
super(analyzer, matchVersion);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexWriterConfig clone() {
|
||||
IndexWriterConfig clone;
|
||||
if (inUseByIndexWriter.get()) {
|
||||
throw new IllegalStateException("cannot clone: this IndexWriterConfig is private to IndexWriter; make a new one instead");
|
||||
}
|
||||
try {
|
||||
clone = (IndexWriterConfig) super.clone();
|
||||
IndexWriterConfig clone = (IndexWriterConfig) super.clone();
|
||||
|
||||
// Mostly shallow clone, but do a deepish clone of
|
||||
// certain objects that have state that cannot be shared
|
||||
// across IW instances:
|
||||
clone.flushPolicy = flushPolicy.clone();
|
||||
clone.indexerThreadPool = indexerThreadPool.clone();
|
||||
clone.mergePolicy = mergePolicy.clone();
|
||||
|
||||
return clone;
|
||||
} catch (CloneNotSupportedException e) {
|
||||
// should not happen
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
// Mostly shallow clone, but do a deepish clone of
|
||||
// certain objects that have state that cannot be shared
|
||||
// across IW instances:
|
||||
clone.inUseByIndexWriter = new AtomicBoolean();
|
||||
clone.flushPolicy = flushPolicy.clone();
|
||||
clone.indexerThreadPool = indexerThreadPool.clone();
|
||||
clone.mergePolicy = mergePolicy.clone();
|
||||
|
||||
return clone;
|
||||
}
|
||||
|
||||
/** Returns the default analyzer to use for indexing documents. */
|
||||
public Analyzer getAnalyzer() {
|
||||
return analyzer;
|
||||
}
|
||||
|
||||
|
||||
/** Specifies {@link OpenMode} of the index.
|
||||
*
|
||||
* <p>Only takes effect when IndexWriter is first created. */
|
||||
@ -231,7 +170,7 @@ public final class IndexWriterConfig implements Cloneable {
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Returns the {@link OpenMode} set by {@link #setOpenMode(OpenMode)}. */
|
||||
@Override
|
||||
public OpenMode getOpenMode() {
|
||||
return openMode;
|
||||
}
|
||||
@ -258,11 +197,7 @@ public final class IndexWriterConfig implements Cloneable {
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the {@link IndexDeletionPolicy} specified in
|
||||
* {@link #setIndexDeletionPolicy(IndexDeletionPolicy)} or the default
|
||||
* {@link KeepOnlyLastCommitDeletionPolicy}/
|
||||
*/
|
||||
@Override
|
||||
public IndexDeletionPolicy getIndexDeletionPolicy() {
|
||||
return delPolicy;
|
||||
}
|
||||
@ -277,11 +212,7 @@ public final class IndexWriterConfig implements Cloneable {
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the {@link IndexCommit} as specified in
|
||||
* {@link #setIndexCommit(IndexCommit)} or the default, <code>null</code>
|
||||
* which specifies to open the latest index commit point.
|
||||
*/
|
||||
@Override
|
||||
public IndexCommit getIndexCommit() {
|
||||
return commit;
|
||||
}
|
||||
@ -298,52 +229,11 @@ public final class IndexWriterConfig implements Cloneable {
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: returns the {@link Similarity} implementation used by this
|
||||
* IndexWriter.
|
||||
*/
|
||||
@Override
|
||||
public Similarity getSimilarity() {
|
||||
return similarity;
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: set the interval between indexed terms. Large values cause less
|
||||
* memory to be used by IndexReader, but slow random-access to terms. Small
|
||||
* values cause more memory to be used by an IndexReader, and speed
|
||||
* random-access to terms.
|
||||
* <p>
|
||||
* This parameter determines the amount of computation required per query
|
||||
* term, regardless of the number of documents that contain that term. In
|
||||
* particular, it is the maximum number of other terms that must be scanned
|
||||
* before a term is located and its frequency and position information may be
|
||||
* processed. In a large index with user-entered query terms, query processing
|
||||
* time is likely to be dominated not by term lookup but rather by the
|
||||
* processing of frequency and positional data. In a small index or when many
|
||||
* uncommon query terms are generated (e.g., by wildcard queries) term lookup
|
||||
* may become a dominant cost.
|
||||
* <p>
|
||||
* In particular, <code>numUniqueTerms/interval</code> terms are read into
|
||||
* memory by an IndexReader, and, on average, <code>interval/2</code> terms
|
||||
* must be scanned for each random term access.
|
||||
*
|
||||
* @see #DEFAULT_TERM_INDEX_INTERVAL
|
||||
*
|
||||
* <p>Takes effect immediately, but only applies to newly
|
||||
* flushed/merged segments. */
|
||||
public IndexWriterConfig setTermIndexInterval(int interval) { // TODO: this should be private to the codec, not settable here
|
||||
this.termIndexInterval = interval;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the interval between indexed terms.
|
||||
*
|
||||
* @see #setTermIndexInterval(int)
|
||||
*/
|
||||
public int getTermIndexInterval() { // TODO: this should be private to the codec, not settable here
|
||||
return termIndexInterval;
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: sets the merge scheduler used by this writer. The default is
|
||||
* {@link ConcurrentMergeScheduler}.
|
||||
@ -357,10 +247,7 @@ public final class IndexWriterConfig implements Cloneable {
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the {@link MergeScheduler} that was set by
|
||||
* {@link #setMergeScheduler(MergeScheduler)}
|
||||
*/
|
||||
@Override
|
||||
public MergeScheduler getMergeScheduler() {
|
||||
return mergeScheduler;
|
||||
}
|
||||
@ -376,173 +263,11 @@ public final class IndexWriterConfig implements Cloneable {
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns allowed timeout when acquiring the write lock.
|
||||
*
|
||||
* @see #setWriteLockTimeout(long)
|
||||
*/
|
||||
@Override
|
||||
public long getWriteLockTimeout() {
|
||||
return writeLockTimeout;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines the minimal number of delete terms required before the buffered
|
||||
* in-memory delete terms and queries are applied and flushed.
|
||||
* <p>Disabled by default (writer flushes by RAM usage).</p>
|
||||
* <p>
|
||||
* NOTE: This setting won't trigger a segment flush.
|
||||
* </p>
|
||||
*
|
||||
* @throws IllegalArgumentException if maxBufferedDeleteTerms
|
||||
* is enabled but smaller than 1
|
||||
* @see #setRAMBufferSizeMB
|
||||
* @see #setFlushPolicy(FlushPolicy)
|
||||
*
|
||||
* <p>Takes effect immediately, but only the next time a
|
||||
* document is added, updated or deleted.
|
||||
*/
|
||||
public IndexWriterConfig setMaxBufferedDeleteTerms(int maxBufferedDeleteTerms) {
|
||||
if (maxBufferedDeleteTerms != DISABLE_AUTO_FLUSH
|
||||
&& maxBufferedDeleteTerms < 1)
|
||||
throw new IllegalArgumentException(
|
||||
"maxBufferedDeleteTerms must at least be 1 when enabled");
|
||||
this.maxBufferedDeleteTerms = maxBufferedDeleteTerms;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of buffered deleted terms that will trigger a flush of all
|
||||
* buffered deletes if enabled.
|
||||
*
|
||||
* @see #setMaxBufferedDeleteTerms(int)
|
||||
*/
|
||||
public int getMaxBufferedDeleteTerms() {
|
||||
return maxBufferedDeleteTerms;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines the amount of RAM that may be used for buffering added documents
|
||||
* and deletions before they are flushed to the Directory. Generally for
|
||||
* faster indexing performance it's best to flush by RAM usage instead of
|
||||
* document count and use as large a RAM buffer as you can.
|
||||
* <p>
|
||||
* When this is set, the writer will flush whenever buffered documents and
|
||||
* deletions use this much RAM. Pass in {@link #DISABLE_AUTO_FLUSH} to prevent
|
||||
* triggering a flush due to RAM usage. Note that if flushing by document
|
||||
* count is also enabled, then the flush will be triggered by whichever comes
|
||||
* first.
|
||||
* <p>
|
||||
* The maximum RAM limit is inherently determined by the JVMs available memory.
|
||||
* Yet, an {@link IndexWriter} session can consume a significantly larger amount
|
||||
* of memory than the given RAM limit since this limit is just an indicator when
|
||||
* to flush memory resident documents to the Directory. Flushes are likely happen
|
||||
* concurrently while other threads adding documents to the writer. For application
|
||||
* stability the available memory in the JVM should be significantly larger than
|
||||
* the RAM buffer used for indexing.
|
||||
* <p>
|
||||
* <b>NOTE</b>: the account of RAM usage for pending deletions is only
|
||||
* approximate. Specifically, if you delete by Query, Lucene currently has no
|
||||
* way to measure the RAM usage of individual Queries so the accounting will
|
||||
* under-estimate and you should compensate by either calling commit()
|
||||
* periodically yourself, or by using {@link #setMaxBufferedDeleteTerms(int)}
|
||||
* to flush and apply buffered deletes by count instead of RAM usage
|
||||
* (for each buffered delete Query a constant number of bytes is used to estimate
|
||||
* RAM usage). Note that enabling {@link #setMaxBufferedDeleteTerms(int)} will
|
||||
* not trigger any segment flushes.
|
||||
* <p>
|
||||
* <b>NOTE</b>: It's not guaranteed that all memory resident documents are flushed
|
||||
* once this limit is exceeded. Depending on the configured {@link FlushPolicy} only a
|
||||
* subset of the buffered documents are flushed and therefore only parts of the RAM
|
||||
* buffer is released.
|
||||
* <p>
|
||||
*
|
||||
* The default value is {@link #DEFAULT_RAM_BUFFER_SIZE_MB}.
|
||||
* @see #setFlushPolicy(FlushPolicy)
|
||||
* @see #setRAMPerThreadHardLimitMB(int)
|
||||
*
|
||||
* <p>Takes effect immediately, but only the next time a
|
||||
* document is added, updated or deleted.
|
||||
*
|
||||
* @throws IllegalArgumentException
|
||||
* if ramBufferSize is enabled but non-positive, or it disables
|
||||
* ramBufferSize when maxBufferedDocs is already disabled
|
||||
*
|
||||
*/
|
||||
public IndexWriterConfig setRAMBufferSizeMB(double ramBufferSizeMB) {
|
||||
if (ramBufferSizeMB != DISABLE_AUTO_FLUSH && ramBufferSizeMB <= 0.0)
|
||||
throw new IllegalArgumentException(
|
||||
"ramBufferSize should be > 0.0 MB when enabled");
|
||||
if (ramBufferSizeMB == DISABLE_AUTO_FLUSH && maxBufferedDocs == DISABLE_AUTO_FLUSH)
|
||||
throw new IllegalArgumentException(
|
||||
"at least one of ramBufferSize and maxBufferedDocs must be enabled");
|
||||
this.ramBufferSizeMB = ramBufferSizeMB;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Returns the value set by {@link #setRAMBufferSizeMB(double)} if enabled. */
|
||||
public double getRAMBufferSizeMB() {
|
||||
return ramBufferSizeMB;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines the minimal number of documents required before the buffered
|
||||
* in-memory documents are flushed as a new Segment. Large values generally
|
||||
* give faster indexing.
|
||||
*
|
||||
* <p>
|
||||
* When this is set, the writer will flush every maxBufferedDocs added
|
||||
* documents. Pass in {@link #DISABLE_AUTO_FLUSH} to prevent triggering a
|
||||
* flush due to number of buffered documents. Note that if flushing by RAM
|
||||
* usage is also enabled, then the flush will be triggered by whichever comes
|
||||
* first.
|
||||
*
|
||||
* <p>
|
||||
* Disabled by default (writer flushes by RAM usage).
|
||||
*
|
||||
* <p>Takes effect immediately, but only the next time a
|
||||
* document is added, updated or deleted.
|
||||
*
|
||||
* @see #setRAMBufferSizeMB(double)
|
||||
* @see #setFlushPolicy(FlushPolicy)
|
||||
* @throws IllegalArgumentException
|
||||
* if maxBufferedDocs is enabled but smaller than 2, or it disables
|
||||
* maxBufferedDocs when ramBufferSize is already disabled
|
||||
*/
|
||||
public IndexWriterConfig setMaxBufferedDocs(int maxBufferedDocs) {
|
||||
if (maxBufferedDocs != DISABLE_AUTO_FLUSH && maxBufferedDocs < 2)
|
||||
throw new IllegalArgumentException(
|
||||
"maxBufferedDocs must at least be 2 when enabled");
|
||||
if (maxBufferedDocs == DISABLE_AUTO_FLUSH
|
||||
&& ramBufferSizeMB == DISABLE_AUTO_FLUSH)
|
||||
throw new IllegalArgumentException(
|
||||
"at least one of ramBufferSize and maxBufferedDocs must be enabled");
|
||||
this.maxBufferedDocs = maxBufferedDocs;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of buffered added documents that will trigger a flush if
|
||||
* enabled.
|
||||
*
|
||||
* @see #setMaxBufferedDocs(int)
|
||||
*/
|
||||
public int getMaxBufferedDocs() {
|
||||
return maxBufferedDocs;
|
||||
}
|
||||
|
||||
/** Set the merged segment warmer. See {@link IndexReaderWarmer}.
|
||||
*
|
||||
* <p>Takes effect on the next merge. */
|
||||
public IndexWriterConfig setMergedSegmentWarmer(IndexReaderWarmer mergeSegmentWarmer) {
|
||||
this.mergedSegmentWarmer = mergeSegmentWarmer;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Returns the current merged segment warmer. See {@link IndexReaderWarmer}. */
|
||||
public IndexReaderWarmer getMergedSegmentWarmer() {
|
||||
return mergedSegmentWarmer;
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: {@link MergePolicy} is invoked whenever there are changes to the
|
||||
* segments in the index. Its role is to select which merges to do, if any,
|
||||
@ -556,25 +281,24 @@ public final class IndexWriterConfig implements Cloneable {
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Set the Codec. See {@link Codec}.
|
||||
*
|
||||
* <p>Only takes effect when IndexWriter is first created. */
|
||||
/**
|
||||
* Set the {@link Codec}.
|
||||
*
|
||||
* <p>
|
||||
* Only takes effect when IndexWriter is first created.
|
||||
*/
|
||||
public IndexWriterConfig setCodec(Codec codec) {
|
||||
this.codec = codec;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Returns the current Codec. See {@link Codec}. */
|
||||
@Override
|
||||
public Codec getCodec() {
|
||||
return codec;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the current MergePolicy in use by this writer.
|
||||
*
|
||||
* @see #setMergePolicy(MergePolicy)
|
||||
*/
|
||||
@Override
|
||||
public MergePolicy getMergePolicy() {
|
||||
return mergePolicy;
|
||||
}
|
||||
@ -595,17 +319,15 @@ public final class IndexWriterConfig implements Cloneable {
|
||||
* NOTE: This only takes effect when IndexWriter is first created.</p>*/
|
||||
IndexWriterConfig setIndexerThreadPool(DocumentsWriterPerThreadPool threadPool) {
|
||||
if (threadPool == null) {
|
||||
throw new IllegalArgumentException("DocumentsWriterPerThreadPool must not be nul");
|
||||
throw new IllegalArgumentException("threadPool must not be null");
|
||||
}
|
||||
this.indexerThreadPool = threadPool;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Returns the configured {@link DocumentsWriterPerThreadPool} instance.
|
||||
* @see #setIndexerThreadPool(DocumentsWriterPerThreadPool)
|
||||
* @return the configured {@link DocumentsWriterPerThreadPool} instance.*/
|
||||
@Override
|
||||
DocumentsWriterPerThreadPool getIndexerThreadPool() {
|
||||
return this.indexerThreadPool;
|
||||
return indexerThreadPool;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -620,8 +342,7 @@ public final class IndexWriterConfig implements Cloneable {
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Returns the max number of simultaneous threads that
|
||||
* may be indexing documents at once in IndexWriter. */
|
||||
@Override
|
||||
public int getMaxThreadStates() {
|
||||
try {
|
||||
return ((ThreadAffinityDocumentsWriterThreadPool) indexerThreadPool).getMaxThreadStates();
|
||||
@ -645,8 +366,7 @@ public final class IndexWriterConfig implements Cloneable {
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Returns true if IndexWriter should pool readers even
|
||||
* if {@link DirectoryReader#open(IndexWriter, boolean)} has not been called. */
|
||||
@Override
|
||||
public boolean getReaderPooling() {
|
||||
return readerPooling;
|
||||
}
|
||||
@ -659,34 +379,11 @@ public final class IndexWriterConfig implements Cloneable {
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Returns the indexing chain set on {@link #setIndexingChain(IndexingChain)}. */
|
||||
@Override
|
||||
IndexingChain getIndexingChain() {
|
||||
return indexingChain;
|
||||
}
|
||||
|
||||
/** Sets the termsIndexDivisor passed to any readers that
|
||||
* IndexWriter opens, for example when applying deletes
|
||||
* or creating a near-real-time reader in {@link
|
||||
* DirectoryReader#open(IndexWriter, boolean)}. If you pass -1, the terms index
|
||||
* won't be loaded by the readers. This is only useful in
|
||||
* advanced situations when you will only .next() through
|
||||
* all terms; attempts to seek will hit an exception.
|
||||
*
|
||||
* <p>Takes effect immediately, but only applies to
|
||||
* readers opened after this call */
|
||||
public IndexWriterConfig setReaderTermsIndexDivisor(int divisor) {
|
||||
if (divisor <= 0 && divisor != -1) {
|
||||
throw new IllegalArgumentException("divisor must be >= 1, or -1 (got " + divisor + ")");
|
||||
}
|
||||
readerTermsIndexDivisor = divisor;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** @see #setReaderTermsIndexDivisor(int) */
|
||||
public int getReaderTermsIndexDivisor() {
|
||||
return readerTermsIndexDivisor;
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: Controls when segments are flushed to disk during indexing.
|
||||
* The {@link FlushPolicy} initialized during {@link IndexWriter} instantiation and once initialized
|
||||
@ -719,28 +416,56 @@ public final class IndexWriterConfig implements Cloneable {
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the max amount of memory each {@link DocumentsWriterPerThread} can
|
||||
* consume until forcefully flushed.
|
||||
* @see #setRAMPerThreadHardLimitMB(int)
|
||||
*/
|
||||
@Override
|
||||
public int getRAMPerThreadHardLimitMB() {
|
||||
return perThreadHardLimitMB;
|
||||
}
|
||||
/**
|
||||
* @see #setFlushPolicy(FlushPolicy)
|
||||
*/
|
||||
|
||||
@Override
|
||||
public FlushPolicy getFlushPolicy() {
|
||||
return flushPolicy;
|
||||
}
|
||||
|
||||
/**
|
||||
* @see #setInfoStream(InfoStream)
|
||||
*/
|
||||
@Override
|
||||
public InfoStream getInfoStream() {
|
||||
return infoStream;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Analyzer getAnalyzer() {
|
||||
return super.getAnalyzer();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getMaxBufferedDeleteTerms() {
|
||||
return super.getMaxBufferedDeleteTerms();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getMaxBufferedDocs() {
|
||||
return super.getMaxBufferedDocs();
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexReaderWarmer getMergedSegmentWarmer() {
|
||||
return super.getMergedSegmentWarmer();
|
||||
}
|
||||
|
||||
@Override
|
||||
public double getRAMBufferSizeMB() {
|
||||
return super.getRAMBufferSizeMB();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getReaderTermsIndexDivisor() {
|
||||
return super.getReaderTermsIndexDivisor();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getTermIndexInterval() {
|
||||
return super.getTermIndexInterval();
|
||||
}
|
||||
|
||||
/** If non-null, information about merges, deletes and a
|
||||
* message when maxFieldLength is reached will be printed
|
||||
* to this.
|
||||
@ -754,40 +479,39 @@ public final class IndexWriterConfig implements Cloneable {
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience method that uses {@link PrintStreamInfoStream}
|
||||
*/
|
||||
/** Convenience method that uses {@link PrintStreamInfoStream} */
|
||||
public IndexWriterConfig setInfoStream(PrintStream printStream) {
|
||||
return setInfoStream(printStream == null ? InfoStream.NO_OUTPUT : new PrintStreamInfoStream(printStream));
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("matchVersion=").append(matchVersion).append("\n");
|
||||
sb.append("analyzer=").append(analyzer == null ? "null" : analyzer.getClass().getName()).append("\n");
|
||||
sb.append("delPolicy=").append(delPolicy.getClass().getName()).append("\n");
|
||||
sb.append("commit=").append(commit == null ? "null" : commit).append("\n");
|
||||
sb.append("openMode=").append(openMode).append("\n");
|
||||
sb.append("similarity=").append(similarity.getClass().getName()).append("\n");
|
||||
sb.append("termIndexInterval=").append(termIndexInterval).append("\n"); // TODO: this should be private to the codec, not settable here
|
||||
sb.append("mergeScheduler=").append(mergeScheduler.getClass().getName()).append("\n");
|
||||
sb.append("default WRITE_LOCK_TIMEOUT=").append(WRITE_LOCK_TIMEOUT).append("\n");
|
||||
sb.append("writeLockTimeout=").append(writeLockTimeout).append("\n");
|
||||
sb.append("maxBufferedDeleteTerms=").append(maxBufferedDeleteTerms).append("\n");
|
||||
sb.append("ramBufferSizeMB=").append(ramBufferSizeMB).append("\n");
|
||||
sb.append("maxBufferedDocs=").append(maxBufferedDocs).append("\n");
|
||||
sb.append("mergedSegmentWarmer=").append(mergedSegmentWarmer).append("\n");
|
||||
sb.append("codec=").append(codec).append("\n");
|
||||
sb.append("infoStream=").append(infoStream.getClass().getName()).append("\n");
|
||||
sb.append("mergePolicy=").append(mergePolicy).append("\n");
|
||||
sb.append("indexerThreadPool=").append(indexerThreadPool).append("\n");
|
||||
sb.append("readerPooling=").append(readerPooling).append("\n");
|
||||
sb.append("readerTermsIndexDivisor=").append(readerTermsIndexDivisor).append("\n");
|
||||
sb.append("flushPolicy=").append(flushPolicy).append("\n");
|
||||
sb.append("perThreadHardLimitMB=").append(perThreadHardLimitMB).append("\n");
|
||||
|
||||
return sb.toString();
|
||||
public IndexWriterConfig setMaxBufferedDeleteTerms(int maxBufferedDeleteTerms) {
|
||||
return (IndexWriterConfig) super.setMaxBufferedDeleteTerms(maxBufferedDeleteTerms);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexWriterConfig setMaxBufferedDocs(int maxBufferedDocs) {
|
||||
return (IndexWriterConfig) super.setMaxBufferedDocs(maxBufferedDocs);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexWriterConfig setMergedSegmentWarmer(IndexReaderWarmer mergeSegmentWarmer) {
|
||||
return (IndexWriterConfig) super.setMergedSegmentWarmer(mergeSegmentWarmer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexWriterConfig setRAMBufferSizeMB(double ramBufferSizeMB) {
|
||||
return (IndexWriterConfig) super.setRAMBufferSizeMB(ramBufferSizeMB);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexWriterConfig setReaderTermsIndexDivisor(int divisor) {
|
||||
return (IndexWriterConfig) super.setReaderTermsIndexDivisor(divisor);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexWriterConfig setTermIndexInterval(int interval) {
|
||||
return (IndexWriterConfig) super.setTermIndexInterval(interval);
|
||||
}
|
||||
|
||||
}
|
||||
|
505
lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java
Executable file
505
lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java
Executable file
@ -0,0 +1,505 @@
|
||||
package org.apache.lucene.index;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain;
|
||||
import org.apache.lucene.index.IndexWriter.IndexReaderWarmer;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.util.InfoStream;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Holds all the configuration used by {@link IndexWriter} with few setters for
|
||||
* settings that can be changed on an {@link IndexWriter} instance "live".
|
||||
*
|
||||
* @since 4.0
|
||||
*/
|
||||
public class LiveIndexWriterConfig {
|
||||
|
||||
private final Analyzer analyzer;
|
||||
|
||||
private volatile int maxBufferedDocs;
|
||||
private volatile double ramBufferSizeMB;
|
||||
private volatile int maxBufferedDeleteTerms;
|
||||
private volatile int readerTermsIndexDivisor;
|
||||
private volatile IndexReaderWarmer mergedSegmentWarmer;
|
||||
private volatile int termIndexInterval; // TODO: this should be private to the codec, not settable here
|
||||
|
||||
// modified by IndexWriterConfig
|
||||
protected volatile IndexDeletionPolicy delPolicy;
|
||||
protected volatile IndexCommit commit;
|
||||
protected volatile OpenMode openMode;
|
||||
protected volatile Similarity similarity;
|
||||
protected volatile MergeScheduler mergeScheduler;
|
||||
protected volatile long writeLockTimeout;
|
||||
protected volatile IndexingChain indexingChain;
|
||||
protected volatile Codec codec;
|
||||
protected volatile InfoStream infoStream;
|
||||
protected volatile MergePolicy mergePolicy;
|
||||
protected volatile DocumentsWriterPerThreadPool indexerThreadPool;
|
||||
protected volatile boolean readerPooling;
|
||||
protected volatile FlushPolicy flushPolicy;
|
||||
protected volatile int perThreadHardLimitMB;
|
||||
|
||||
protected final Version matchVersion;
|
||||
|
||||
// used by IndexWriterConfig
|
||||
LiveIndexWriterConfig(Analyzer analyzer, Version matchVersion) {
|
||||
this.analyzer = analyzer;
|
||||
this.matchVersion = matchVersion;
|
||||
ramBufferSizeMB = IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB;
|
||||
maxBufferedDocs = IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS;
|
||||
maxBufferedDeleteTerms = IndexWriterConfig.DEFAULT_MAX_BUFFERED_DELETE_TERMS;
|
||||
readerTermsIndexDivisor = IndexWriterConfig.DEFAULT_READER_TERMS_INDEX_DIVISOR;
|
||||
mergedSegmentWarmer = null;
|
||||
termIndexInterval = IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL; // TODO: this should be private to the codec, not settable here
|
||||
delPolicy = new KeepOnlyLastCommitDeletionPolicy();
|
||||
commit = null;
|
||||
openMode = OpenMode.CREATE_OR_APPEND;
|
||||
similarity = IndexSearcher.getDefaultSimilarity();
|
||||
mergeScheduler = new ConcurrentMergeScheduler();
|
||||
writeLockTimeout = IndexWriterConfig.WRITE_LOCK_TIMEOUT;
|
||||
indexingChain = DocumentsWriterPerThread.defaultIndexingChain;
|
||||
codec = Codec.getDefault();
|
||||
infoStream = InfoStream.getDefault();
|
||||
mergePolicy = new TieredMergePolicy();
|
||||
flushPolicy = new FlushByRamOrCountsPolicy();
|
||||
readerPooling = IndexWriterConfig.DEFAULT_READER_POOLING;
|
||||
indexerThreadPool = new ThreadAffinityDocumentsWriterThreadPool(IndexWriterConfig.DEFAULT_MAX_THREAD_STATES);
|
||||
perThreadHardLimitMB = IndexWriterConfig.DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new config that that handles the live {@link IndexWriter}
|
||||
* settings.
|
||||
*/
|
||||
LiveIndexWriterConfig(IndexWriterConfig config) {
|
||||
maxBufferedDeleteTerms = config.getMaxBufferedDeleteTerms();
|
||||
maxBufferedDocs = config.getMaxBufferedDocs();
|
||||
mergedSegmentWarmer = config.getMergedSegmentWarmer();
|
||||
ramBufferSizeMB = config.getRAMBufferSizeMB();
|
||||
readerTermsIndexDivisor = config.getReaderTermsIndexDivisor();
|
||||
termIndexInterval = config.getTermIndexInterval();
|
||||
matchVersion = config.matchVersion;
|
||||
analyzer = config.getAnalyzer();
|
||||
delPolicy = config.getIndexDeletionPolicy();
|
||||
commit = config.getIndexCommit();
|
||||
openMode = config.getOpenMode();
|
||||
similarity = config.getSimilarity();
|
||||
mergeScheduler = config.getMergeScheduler();
|
||||
writeLockTimeout = config.getWriteLockTimeout();
|
||||
indexingChain = config.getIndexingChain();
|
||||
codec = config.getCodec();
|
||||
infoStream = config.getInfoStream();
|
||||
mergePolicy = config.getMergePolicy();
|
||||
indexerThreadPool = config.getIndexerThreadPool();
|
||||
readerPooling = config.getReaderPooling();
|
||||
flushPolicy = config.getFlushPolicy();
|
||||
perThreadHardLimitMB = config.getRAMPerThreadHardLimitMB();
|
||||
}
|
||||
|
||||
/** Returns the default analyzer to use for indexing documents. */
|
||||
public Analyzer getAnalyzer() {
|
||||
return analyzer;
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: set the interval between indexed terms. Large values cause less
|
||||
* memory to be used by IndexReader, but slow random-access to terms. Small
|
||||
* values cause more memory to be used by an IndexReader, and speed
|
||||
* random-access to terms.
|
||||
* <p>
|
||||
* This parameter determines the amount of computation required per query
|
||||
* term, regardless of the number of documents that contain that term. In
|
||||
* particular, it is the maximum number of other terms that must be scanned
|
||||
* before a term is located and its frequency and position information may be
|
||||
* processed. In a large index with user-entered query terms, query processing
|
||||
* time is likely to be dominated not by term lookup but rather by the
|
||||
* processing of frequency and positional data. In a small index or when many
|
||||
* uncommon query terms are generated (e.g., by wildcard queries) term lookup
|
||||
* may become a dominant cost.
|
||||
* <p>
|
||||
* In particular, <code>numUniqueTerms/interval</code> terms are read into
|
||||
* memory by an IndexReader, and, on average, <code>interval/2</code> terms
|
||||
* must be scanned for each random term access.
|
||||
*
|
||||
* <p>
|
||||
* Takes effect immediately, but only applies to newly flushed/merged
|
||||
* segments.
|
||||
*
|
||||
* @see IndexWriterConfig#DEFAULT_TERM_INDEX_INTERVAL
|
||||
*/
|
||||
public LiveIndexWriterConfig setTermIndexInterval(int interval) { // TODO: this should be private to the codec, not settable here
|
||||
this.termIndexInterval = interval;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the interval between indexed terms.
|
||||
*
|
||||
* @see #setTermIndexInterval(int)
|
||||
*/
|
||||
public int getTermIndexInterval() { // TODO: this should be private to the codec, not settable here
|
||||
return termIndexInterval;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines the minimal number of delete terms required before the buffered
|
||||
* in-memory delete terms and queries are applied and flushed.
|
||||
* <p>
|
||||
* Disabled by default (writer flushes by RAM usage).
|
||||
* <p>
|
||||
* NOTE: This setting won't trigger a segment flush.
|
||||
*
|
||||
* <p>
|
||||
* Takes effect immediately, but only the next time a document is added,
|
||||
* updated or deleted.
|
||||
*
|
||||
* @throws IllegalArgumentException
|
||||
* if maxBufferedDeleteTerms is enabled but smaller than 1
|
||||
*
|
||||
* @see #setRAMBufferSizeMB
|
||||
*/
|
||||
public LiveIndexWriterConfig setMaxBufferedDeleteTerms(int maxBufferedDeleteTerms) {
|
||||
if (maxBufferedDeleteTerms != IndexWriterConfig.DISABLE_AUTO_FLUSH && maxBufferedDeleteTerms < 1) {
|
||||
throw new IllegalArgumentException("maxBufferedDeleteTerms must at least be 1 when enabled");
|
||||
}
|
||||
this.maxBufferedDeleteTerms = maxBufferedDeleteTerms;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of buffered deleted terms that will trigger a flush of all
|
||||
* buffered deletes if enabled.
|
||||
*
|
||||
* @see #setMaxBufferedDeleteTerms(int)
|
||||
*/
|
||||
public int getMaxBufferedDeleteTerms() {
|
||||
return maxBufferedDeleteTerms;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines the amount of RAM that may be used for buffering added documents
|
||||
* and deletions before they are flushed to the Directory. Generally for
|
||||
* faster indexing performance it's best to flush by RAM usage instead of
|
||||
* document count and use as large a RAM buffer as you can.
|
||||
* <p>
|
||||
* When this is set, the writer will flush whenever buffered documents and
|
||||
* deletions use this much RAM. Pass in
|
||||
* {@link IndexWriterConfig#DISABLE_AUTO_FLUSH} to prevent triggering a flush
|
||||
* due to RAM usage. Note that if flushing by document count is also enabled,
|
||||
* then the flush will be triggered by whichever comes first.
|
||||
* <p>
|
||||
* The maximum RAM limit is inherently determined by the JVMs available
|
||||
* memory. Yet, an {@link IndexWriter} session can consume a significantly
|
||||
* larger amount of memory than the given RAM limit since this limit is just
|
||||
* an indicator when to flush memory resident documents to the Directory.
|
||||
* Flushes are likely happen concurrently while other threads adding documents
|
||||
* to the writer. For application stability the available memory in the JVM
|
||||
* should be significantly larger than the RAM buffer used for indexing.
|
||||
* <p>
|
||||
* <b>NOTE</b>: the account of RAM usage for pending deletions is only
|
||||
* approximate. Specifically, if you delete by Query, Lucene currently has no
|
||||
* way to measure the RAM usage of individual Queries so the accounting will
|
||||
* under-estimate and you should compensate by either calling commit()
|
||||
* periodically yourself, or by using {@link #setMaxBufferedDeleteTerms(int)}
|
||||
* to flush and apply buffered deletes by count instead of RAM usage (for each
|
||||
* buffered delete Query a constant number of bytes is used to estimate RAM
|
||||
* usage). Note that enabling {@link #setMaxBufferedDeleteTerms(int)} will not
|
||||
* trigger any segment flushes.
|
||||
* <p>
|
||||
* <b>NOTE</b>: It's not guaranteed that all memory resident documents are
|
||||
* flushed once this limit is exceeded. Depending on the configured
|
||||
* {@link FlushPolicy} only a subset of the buffered documents are flushed and
|
||||
* therefore only parts of the RAM buffer is released.
|
||||
* <p>
|
||||
*
|
||||
* The default value is {@link IndexWriterConfig#DEFAULT_RAM_BUFFER_SIZE_MB}.
|
||||
*
|
||||
* <p>
|
||||
* Takes effect immediately, but only the next time a document is added,
|
||||
* updated or deleted.
|
||||
*
|
||||
* @see IndexWriterConfig#setRAMPerThreadHardLimitMB(int)
|
||||
*
|
||||
* @throws IllegalArgumentException
|
||||
* if ramBufferSize is enabled but non-positive, or it disables
|
||||
* ramBufferSize when maxBufferedDocs is already disabled
|
||||
*/
|
||||
public LiveIndexWriterConfig setRAMBufferSizeMB(double ramBufferSizeMB) {
|
||||
if (ramBufferSizeMB != IndexWriterConfig.DISABLE_AUTO_FLUSH && ramBufferSizeMB <= 0.0) {
|
||||
throw new IllegalArgumentException("ramBufferSize should be > 0.0 MB when enabled");
|
||||
}
|
||||
if (ramBufferSizeMB == IndexWriterConfig.DISABLE_AUTO_FLUSH
|
||||
&& maxBufferedDocs == IndexWriterConfig.DISABLE_AUTO_FLUSH) {
|
||||
throw new IllegalArgumentException("at least one of ramBufferSize and maxBufferedDocs must be enabled");
|
||||
}
|
||||
this.ramBufferSizeMB = ramBufferSizeMB;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Returns the value set by {@link #setRAMBufferSizeMB(double)} if enabled. */
|
||||
public double getRAMBufferSizeMB() {
|
||||
return ramBufferSizeMB;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines the minimal number of documents required before the buffered
|
||||
* in-memory documents are flushed as a new Segment. Large values generally
|
||||
* give faster indexing.
|
||||
*
|
||||
* <p>
|
||||
* When this is set, the writer will flush every maxBufferedDocs added
|
||||
* documents. Pass in {@link IndexWriterConfig#DISABLE_AUTO_FLUSH} to prevent
|
||||
* triggering a flush due to number of buffered documents. Note that if
|
||||
* flushing by RAM usage is also enabled, then the flush will be triggered by
|
||||
* whichever comes first.
|
||||
*
|
||||
* <p>
|
||||
* Disabled by default (writer flushes by RAM usage).
|
||||
*
|
||||
* <p>
|
||||
* Takes effect immediately, but only the next time a document is added,
|
||||
* updated or deleted.
|
||||
*
|
||||
* @see #setRAMBufferSizeMB(double)
|
||||
* @throws IllegalArgumentException
|
||||
* if maxBufferedDocs is enabled but smaller than 2, or it disables
|
||||
* maxBufferedDocs when ramBufferSize is already disabled
|
||||
*/
|
||||
public LiveIndexWriterConfig setMaxBufferedDocs(int maxBufferedDocs) {
|
||||
if (maxBufferedDocs != IndexWriterConfig.DISABLE_AUTO_FLUSH && maxBufferedDocs < 2) {
|
||||
throw new IllegalArgumentException("maxBufferedDocs must at least be 2 when enabled");
|
||||
}
|
||||
if (maxBufferedDocs == IndexWriterConfig.DISABLE_AUTO_FLUSH
|
||||
&& ramBufferSizeMB == IndexWriterConfig.DISABLE_AUTO_FLUSH) {
|
||||
throw new IllegalArgumentException("at least one of ramBufferSize and maxBufferedDocs must be enabled");
|
||||
}
|
||||
this.maxBufferedDocs = maxBufferedDocs;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of buffered added documents that will trigger a flush if
|
||||
* enabled.
|
||||
*
|
||||
* @see #setMaxBufferedDocs(int)
|
||||
*/
|
||||
public int getMaxBufferedDocs() {
|
||||
return maxBufferedDocs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the merged segment warmer. See {@link IndexReaderWarmer}.
|
||||
*
|
||||
* <p>
|
||||
* Takes effect on the next merge.
|
||||
*/
|
||||
public LiveIndexWriterConfig setMergedSegmentWarmer(IndexReaderWarmer mergeSegmentWarmer) {
|
||||
this.mergedSegmentWarmer = mergeSegmentWarmer;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Returns the current merged segment warmer. See {@link IndexReaderWarmer}. */
|
||||
public IndexReaderWarmer getMergedSegmentWarmer() {
|
||||
return mergedSegmentWarmer;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the termsIndexDivisor passed to any readers that IndexWriter opens,
|
||||
* for example when applying deletes or creating a near-real-time reader in
|
||||
* {@link DirectoryReader#open(IndexWriter, boolean)}. If you pass -1, the
|
||||
* terms index won't be loaded by the readers. This is only useful in advanced
|
||||
* situations when you will only .next() through all terms; attempts to seek
|
||||
* will hit an exception.
|
||||
*
|
||||
* <p>
|
||||
* Takes effect immediately, but only applies to readers opened after this
|
||||
* call
|
||||
*/
|
||||
public LiveIndexWriterConfig setReaderTermsIndexDivisor(int divisor) {
|
||||
if (divisor <= 0 && divisor != -1) {
|
||||
throw new IllegalArgumentException("divisor must be >= 1, or -1 (got " + divisor + ")");
|
||||
}
|
||||
readerTermsIndexDivisor = divisor;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** @see #setReaderTermsIndexDivisor(int) */
|
||||
public int getReaderTermsIndexDivisor() {
|
||||
return readerTermsIndexDivisor;
|
||||
}
|
||||
|
||||
/** Returns the {@link OpenMode} set by {@link IndexWriterConfig#setOpenMode(OpenMode)}. */
|
||||
public OpenMode getOpenMode() {
|
||||
return openMode;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the {@link IndexDeletionPolicy} specified in
|
||||
* {@link IndexWriterConfig#setIndexDeletionPolicy(IndexDeletionPolicy)} or
|
||||
* the default {@link KeepOnlyLastCommitDeletionPolicy}/
|
||||
*/
|
||||
public IndexDeletionPolicy getIndexDeletionPolicy() {
|
||||
return delPolicy;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the {@link IndexCommit} as specified in
|
||||
* {@link IndexWriterConfig#setIndexCommit(IndexCommit)} or the default,
|
||||
* {@code null} which specifies to open the latest index commit point.
|
||||
*/
|
||||
public IndexCommit getIndexCommit() {
|
||||
return commit;
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: returns the {@link Similarity} implementation used by this
|
||||
* {@link IndexWriter}.
|
||||
*/
|
||||
public Similarity getSimilarity() {
|
||||
return similarity;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the {@link MergeScheduler} that was set by
|
||||
* {@link IndexWriterConfig#setMergeScheduler(MergeScheduler)}.
|
||||
*/
|
||||
public MergeScheduler getMergeScheduler() {
|
||||
return mergeScheduler;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns allowed timeout when acquiring the write lock.
|
||||
*
|
||||
* @see IndexWriterConfig#setWriteLockTimeout(long)
|
||||
*/
|
||||
public long getWriteLockTimeout() {
|
||||
return writeLockTimeout;
|
||||
}
|
||||
|
||||
/** Returns the current {@link Codec}. */
|
||||
public Codec getCodec() {
|
||||
return codec;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the current MergePolicy in use by this writer.
|
||||
*
|
||||
* @see IndexWriterConfig#setMergePolicy(MergePolicy)
|
||||
*/
|
||||
public MergePolicy getMergePolicy() {
|
||||
return mergePolicy;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the configured {@link DocumentsWriterPerThreadPool} instance.
|
||||
*
|
||||
* @see IndexWriterConfig#setIndexerThreadPool(DocumentsWriterPerThreadPool)
|
||||
* @return the configured {@link DocumentsWriterPerThreadPool} instance.
|
||||
*/
|
||||
DocumentsWriterPerThreadPool getIndexerThreadPool() {
|
||||
return indexerThreadPool;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the max number of simultaneous threads that may be indexing
|
||||
* documents at once in IndexWriter.
|
||||
*/
|
||||
public int getMaxThreadStates() {
|
||||
try {
|
||||
return ((ThreadAffinityDocumentsWriterThreadPool) indexerThreadPool).getMaxThreadStates();
|
||||
} catch (ClassCastException cce) {
|
||||
throw new IllegalStateException(cce);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns {@code true} if {@link IndexWriter} should pool readers even if
|
||||
* {@link DirectoryReader#open(IndexWriter, boolean)} has not been called.
|
||||
*/
|
||||
public boolean getReaderPooling() {
|
||||
return readerPooling;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the indexing chain set on
|
||||
* {@link IndexWriterConfig#setIndexingChain(IndexingChain)}.
|
||||
*/
|
||||
IndexingChain getIndexingChain() {
|
||||
return indexingChain;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the max amount of memory each {@link DocumentsWriterPerThread} can
|
||||
* consume until forcefully flushed.
|
||||
*
|
||||
* @see IndexWriterConfig#setRAMPerThreadHardLimitMB(int)
|
||||
*/
|
||||
public int getRAMPerThreadHardLimitMB() {
|
||||
return perThreadHardLimitMB;
|
||||
}
|
||||
|
||||
/**
|
||||
* @see IndexWriterConfig#setFlushPolicy(FlushPolicy)
|
||||
*/
|
||||
public FlushPolicy getFlushPolicy() {
|
||||
return flushPolicy;
|
||||
}
|
||||
|
||||
/**
|
||||
* @see IndexWriterConfig#setInfoStream(InfoStream)
|
||||
*/
|
||||
public InfoStream getInfoStream() {
|
||||
return infoStream;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("matchVersion=").append(matchVersion).append("\n");
|
||||
sb.append("analyzer=").append(analyzer == null ? "null" : analyzer.getClass().getName()).append("\n");
|
||||
sb.append("ramBufferSizeMB=").append(getRAMBufferSizeMB()).append("\n");
|
||||
sb.append("maxBufferedDocs=").append(getMaxBufferedDocs()).append("\n");
|
||||
sb.append("maxBufferedDeleteTerms=").append(getMaxBufferedDeleteTerms()).append("\n");
|
||||
sb.append("mergedSegmentWarmer=").append(getMergeScheduler()).append("\n");
|
||||
sb.append("readerTermsIndexDivisor=").append(getReaderTermsIndexDivisor()).append("\n");
|
||||
sb.append("termIndexInterval=").append(getTermIndexInterval()).append("\n"); // TODO: this should be private to the codec, not settable here
|
||||
sb.append("delPolicy=").append(getIndexDeletionPolicy().getClass().getName()).append("\n");
|
||||
IndexCommit commit = getIndexCommit();
|
||||
sb.append("commit=").append(commit == null ? "null" : commit).append("\n");
|
||||
sb.append("openMode=").append(getOpenMode()).append("\n");
|
||||
sb.append("similarity=").append(getSimilarity().getClass().getName()).append("\n");
|
||||
sb.append("mergeScheduler=").append(getMergeScheduler().getClass().getName()).append("\n");
|
||||
sb.append("default WRITE_LOCK_TIMEOUT=").append(IndexWriterConfig.WRITE_LOCK_TIMEOUT).append("\n");
|
||||
sb.append("writeLockTimeout=").append(getWriteLockTimeout()).append("\n");
|
||||
sb.append("codec=").append(getCodec()).append("\n");
|
||||
sb.append("infoStream=").append(getInfoStream().getClass().getName()).append("\n");
|
||||
sb.append("mergePolicy=").append(getMergePolicy()).append("\n");
|
||||
sb.append("indexerThreadPool=").append(getIndexerThreadPool()).append("\n");
|
||||
sb.append("readerPooling=").append(getReaderPooling()).append("\n");
|
||||
sb.append("flushPolicy=").append(getFlushPolicy()).append("\n");
|
||||
sb.append("perThreadHardLimitMB=").append(getRAMPerThreadHardLimitMB()).append("\n");
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
}
|
@ -294,7 +294,7 @@ public class TestFlushByRamOrCountsPolicy extends LuceneTestCase {
|
||||
|
||||
public class IndexThread extends Thread {
|
||||
IndexWriter writer;
|
||||
IndexWriterConfig iwc;
|
||||
LiveIndexWriterConfig iwc;
|
||||
LineFileDocs docs;
|
||||
private AtomicInteger pendingDocs;
|
||||
private final boolean doRandomCommit;
|
||||
|
@ -25,8 +25,6 @@ import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
@ -114,18 +112,70 @@ public class TestIndexWriterConfig extends LuceneTestCase {
|
||||
|
||||
@Test
|
||||
public void testSettersChaining() throws Exception {
|
||||
// Ensures that every setter returns IndexWriterConfig to enable easy
|
||||
// chaining.
|
||||
// Ensures that every setter returns IndexWriterConfig to allow chaining.
|
||||
HashSet<String> liveSetters = new HashSet<String>();
|
||||
HashSet<String> allSetters = new HashSet<String>();
|
||||
for (Method m : IndexWriterConfig.class.getDeclaredMethods()) {
|
||||
if (m.getDeclaringClass() == IndexWriterConfig.class
|
||||
&& m.getName().startsWith("set")
|
||||
&& !Modifier.isStatic(m.getModifiers())) {
|
||||
assertEquals("method " + m.getName() + " does not return IndexWriterConfig",
|
||||
IndexWriterConfig.class, m.getReturnType());
|
||||
if (m.getName().startsWith("set") && !Modifier.isStatic(m.getModifiers())) {
|
||||
allSetters.add(m.getName());
|
||||
// setters overridden from LiveIndexWriterConfig are returned twice, once with
|
||||
// IndexWriterConfig return type and second with LiveIndexWriterConfig. The ones
|
||||
// from LiveIndexWriterConfig are marked 'synthetic', so just collect them and
|
||||
// assert in the end that we also received them from IWC.
|
||||
if (m.isSynthetic()) {
|
||||
liveSetters.add(m.getName());
|
||||
} else {
|
||||
assertEquals("method " + m.getName() + " does not return IndexWriterConfig",
|
||||
IndexWriterConfig.class, m.getReturnType());
|
||||
}
|
||||
}
|
||||
}
|
||||
for (String setter : liveSetters) {
|
||||
assertTrue("setter method not overridden by IndexWriterConfig: " + setter, allSetters.contains(setter));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReuse() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
// test that if the same IWC is reused across two IWs, it is cloned by each.
|
||||
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, conf);
|
||||
LiveIndexWriterConfig liveConf1 = iw.w.getConfig();
|
||||
iw.close();
|
||||
|
||||
iw = new RandomIndexWriter(random(), dir, conf);
|
||||
LiveIndexWriterConfig liveConf2 = iw.w.getConfig();
|
||||
iw.close();
|
||||
|
||||
// LiveIndexWriterConfig's "copy" constructor doesn't clone objects.
|
||||
assertNotSame("IndexWriterConfig should have been cloned", liveConf1.getMergePolicy(), liveConf2.getMergePolicy());
|
||||
|
||||
dir.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOverrideGetters() throws Exception {
|
||||
// Test that IndexWriterConfig overrides all getters, so that javadocs
|
||||
// contain all methods for the users. Also, ensures that IndexWriterConfig
|
||||
// doesn't declare getters that are not declared on LiveIWC.
|
||||
HashSet<String> liveGetters = new HashSet<String>();
|
||||
for (Method m : LiveIndexWriterConfig.class.getDeclaredMethods()) {
|
||||
if (m.getName().startsWith("get") && !Modifier.isStatic(m.getModifiers())) {
|
||||
liveGetters.add(m.getName());
|
||||
}
|
||||
}
|
||||
|
||||
for (Method m : IndexWriterConfig.class.getDeclaredMethods()) {
|
||||
if (m.getName().startsWith("get") && !Modifier.isStatic(m.getModifiers())) {
|
||||
assertEquals("method " + m.getName() + " not overrided by IndexWriterConfig",
|
||||
IndexWriterConfig.class, m.getDeclaringClass());
|
||||
assertTrue("method " + m.getName() + " not declared on LiveIndexWriterConfig",
|
||||
liveGetters.contains(m.getName()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testConstants() throws Exception {
|
||||
// Tests that the values of the constants does not change
|
||||
@ -276,53 +326,4 @@ public class TestIndexWriterConfig extends LuceneTestCase {
|
||||
assertEquals(LogByteSizeMergePolicy.class, conf.getMergePolicy().getClass());
|
||||
}
|
||||
|
||||
public void testReuse() throws Exception {
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
||||
Directory dir = newDirectory();
|
||||
Document doc = new Document();
|
||||
doc.add(newTextField("foo", "bar", Store.YES));
|
||||
RandomIndexWriter riw = new RandomIndexWriter(random(), dir, iwc);
|
||||
riw.addDocument(doc);
|
||||
riw.close();
|
||||
|
||||
// Sharing IWC should be fine:
|
||||
riw = new RandomIndexWriter(random(), dir, iwc);
|
||||
riw.addDocument(doc);
|
||||
riw.close();
|
||||
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testIWCClone() throws Exception {
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter riw = new RandomIndexWriter(random(), dir, iwc);
|
||||
|
||||
// Cannot clone IW's private IWC clone:
|
||||
try {
|
||||
riw.w.getConfig().clone();
|
||||
fail("did not hit expected exception");
|
||||
} catch (IllegalStateException ise) {
|
||||
// expected
|
||||
}
|
||||
riw.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testIWCInvalidReuse() throws Exception {
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter riw = new RandomIndexWriter(random(), dir, iwc);
|
||||
IndexWriterConfig privateIWC = riw.w.getConfig();
|
||||
riw.close();
|
||||
|
||||
// Cannot clone IW's private IWC clone:
|
||||
try {
|
||||
new RandomIndexWriter(random(), dir, privateIWC);
|
||||
fail("did not hit expected exception");
|
||||
} catch (IllegalStateException ise) {
|
||||
// expected
|
||||
}
|
||||
dir.close();
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user