LUCENE-4132: introduce LiveIndexWriterConfig, returned from IndexWriter.getConfig()

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1351225 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Shai Erera 2012-06-18 08:09:31 +00:00
parent da70cee7ff
commit a11013a990
10 changed files with 690 additions and 466 deletions

View File

@ -10,6 +10,12 @@ New features
* LUCENE-4108: add replaceTaxonomy to DirectoryTaxonomyWriter, which replaces * LUCENE-4108: add replaceTaxonomy to DirectoryTaxonomyWriter, which replaces
the taxonomy in place with the given one. (Shai Erera) the taxonomy in place with the given one. (Shai Erera)
API Changes
* LUCENE-4132: IndexWriter.getConfig() now returns a LiveIndexWriterConfig object
which can be used to change the IndexWriter's live settings. IndexWriterConfig
is used only for initializing the IndexWriter. (Shai Erera)
======================= Lucene 4.0.0-ALPHA ======================= ======================= Lucene 4.0.0-ALPHA =======================

View File

@ -133,7 +133,7 @@ final class DocumentsWriter {
final DocumentsWriterFlushControl flushControl; final DocumentsWriterFlushControl flushControl;
final Codec codec; final Codec codec;
DocumentsWriter(Codec codec, IndexWriterConfig config, Directory directory, IndexWriter writer, FieldNumbers globalFieldNumbers, DocumentsWriter(Codec codec, LiveIndexWriterConfig config, Directory directory, IndexWriter writer, FieldNumbers globalFieldNumbers,
BufferedDeletesStream bufferedDeletesStream) throws IOException { BufferedDeletesStream bufferedDeletesStream) throws IOException {
this.codec = codec; this.codec = codec;
this.directory = directory; this.directory = directory;

View File

@ -65,10 +65,9 @@ final class DocumentsWriterFlushControl implements MemoryController {
private final FlushPolicy flushPolicy; private final FlushPolicy flushPolicy;
private boolean closed = false; private boolean closed = false;
private final DocumentsWriter documentsWriter; private final DocumentsWriter documentsWriter;
private final IndexWriterConfig config; private final LiveIndexWriterConfig config;
DocumentsWriterFlushControl(DocumentsWriter documentsWriter, DocumentsWriterFlushControl(DocumentsWriter documentsWriter, LiveIndexWriterConfig config) {
IndexWriterConfig config) {
this.stallControl = new DocumentsWriterStallControl(); this.stallControl = new DocumentsWriterStallControl();
this.perThreadPool = documentsWriter.perThreadPool; this.perThreadPool = documentsWriter.perThreadPool;
this.flushPolicy = documentsWriter.flushPolicy; this.flushPolicy = documentsWriter.flushPolicy;

View File

@ -135,7 +135,7 @@ abstract class DocumentsWriterPerThreadPool implements Cloneable {
numThreadStatesActive = 0; numThreadStatesActive = 0;
} }
void initialize(DocumentsWriter documentsWriter, FieldNumbers globalFieldMap, IndexWriterConfig config) { void initialize(DocumentsWriter documentsWriter, FieldNumbers globalFieldMap, LiveIndexWriterConfig config) {
this.documentsWriter.set(documentsWriter); // thread pool is bound to DW this.documentsWriter.set(documentsWriter); // thread pool is bound to DW
this.globalFieldMap.set(globalFieldMap); this.globalFieldMap.set(globalFieldMap);
for (int i = 0; i < threadStates.length; i++) { for (int i = 0; i < threadStates.length; i++) {

View File

@ -52,7 +52,7 @@ import org.apache.lucene.util.SetOnce;
*/ */
abstract class FlushPolicy implements Cloneable { abstract class FlushPolicy implements Cloneable {
protected SetOnce<DocumentsWriter> writer = new SetOnce<DocumentsWriter>(); protected SetOnce<DocumentsWriter> writer = new SetOnce<DocumentsWriter>();
protected IndexWriterConfig indexWriterConfig; protected LiveIndexWriterConfig indexWriterConfig;
/** /**
* Called for each delete term. If this is a delete triggered due to an update * Called for each delete term. If this is a delete triggered due to an update

View File

@ -258,7 +258,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
// The instance that was passed to the constructor. It is saved only in order // The instance that was passed to the constructor. It is saved only in order
// to allow users to query an IndexWriter settings. // to allow users to query an IndexWriter settings.
private final IndexWriterConfig config; private final LiveIndexWriterConfig config;
// The PayloadProcessorProvider to use when segments are merged // The PayloadProcessorProvider to use when segments are merged
private PayloadProcessorProvider payloadProcessorProvider; private PayloadProcessorProvider payloadProcessorProvider;
@ -586,11 +586,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
*/ */
public IndexWriter(Directory d, IndexWriterConfig conf) public IndexWriter(Directory d, IndexWriterConfig conf)
throws CorruptIndexException, LockObtainFailedException, IOException { throws CorruptIndexException, LockObtainFailedException, IOException {
if (conf.inUseByIndexWriter.get()) { config = new LiveIndexWriterConfig(conf.clone());
throw new IllegalStateException("the provided IndexWriterConfig was previously used by a different IndexWriter; please make a new one instead");
}
config = conf.clone();
config.inUseByIndexWriter.set(true);
directory = d; directory = d;
analyzer = config.getAnalyzer(); analyzer = config.getAnalyzer();
infoStream = config.getInfoStream(); infoStream = config.getInfoStream();
@ -757,17 +753,10 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
} }
/** /**
* Returns the private {@link IndexWriterConfig}, cloned * Returns a {@link LiveIndexWriterConfig}, which can be used to query the IndexWriter
* from the {@link IndexWriterConfig} passed to * current settings, as well as modify "live" ones.
* {@link #IndexWriter(Directory, IndexWriterConfig)}.
* <p>
* <b>NOTE:</b> some settings may be changed on the
* returned {@link IndexWriterConfig}, and will take
* effect in the current IndexWriter instance. See the
* javadocs for the specific setters in {@link
* IndexWriterConfig} for details.
*/ */
public IndexWriterConfig getConfig() { public LiveIndexWriterConfig getConfig() {
ensureOpen(false); ensureOpen(false);
return config; return config;
} }

View File

@ -18,7 +18,6 @@ package org.apache.lucene.index;
*/ */
import java.io.PrintStream; import java.io.PrintStream;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.Codec;
@ -28,17 +27,14 @@ import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.InfoStream; import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.PrintStreamInfoStream; import org.apache.lucene.util.PrintStreamInfoStream;
import org.apache.lucene.util.SetOnce;
import org.apache.lucene.util.Version; import org.apache.lucene.util.Version;
/** /**
* Holds all the configuration of {@link IndexWriter}. You * Holds all the configuration that is used to create an {@link IndexWriter}.
* should instantiate this class, call the setters to set * Once {@link IndexWriter} has been created with this object, changes to this
* your configuration, then pass it to {@link IndexWriter}. * object will not affect the {@link IndexWriter} instance. For that, use
* Note that {@link IndexWriter} makes a private clone; if * {@link LiveIndexWriterConfig} that is returned from {@link IndexWriter#getConfig()}.
* you need to subsequently change settings use {@link *
* IndexWriter#getConfig}.
*
* <p> * <p>
* All setter methods return {@link IndexWriterConfig} to allow chaining * All setter methods return {@link IndexWriterConfig} to allow chaining
* settings conveniently, for example: * settings conveniently, for example:
@ -47,10 +43,12 @@ import org.apache.lucene.util.Version;
* IndexWriterConfig conf = new IndexWriterConfig(analyzer); * IndexWriterConfig conf = new IndexWriterConfig(analyzer);
* conf.setter1().setter2(); * conf.setter1().setter2();
* </pre> * </pre>
* *
* @see IndexWriter#getConfig()
*
* @since 3.1 * @since 3.1
*/ */
public final class IndexWriterConfig implements Cloneable { public final class IndexWriterConfig extends LiveIndexWriterConfig implements Cloneable {
/** /**
* Specifies the open mode for {@link IndexWriter}. * Specifies the open mode for {@link IndexWriter}.
@ -131,33 +129,6 @@ public final class IndexWriterConfig implements Cloneable {
return WRITE_LOCK_TIMEOUT; return WRITE_LOCK_TIMEOUT;
} }
private final Analyzer analyzer;
private volatile IndexDeletionPolicy delPolicy;
private volatile IndexCommit commit;
private volatile OpenMode openMode;
private volatile Similarity similarity;
private volatile int termIndexInterval; // TODO: this should be private to the codec, not settable here
private volatile MergeScheduler mergeScheduler;
private volatile long writeLockTimeout;
private volatile int maxBufferedDeleteTerms;
private volatile double ramBufferSizeMB;
private volatile int maxBufferedDocs;
private volatile IndexingChain indexingChain;
private volatile IndexReaderWarmer mergedSegmentWarmer;
private volatile Codec codec;
private volatile InfoStream infoStream;
private volatile MergePolicy mergePolicy;
private volatile DocumentsWriterPerThreadPool indexerThreadPool;
private volatile boolean readerPooling;
private volatile int readerTermsIndexDivisor;
private volatile FlushPolicy flushPolicy;
private volatile int perThreadHardLimitMB;
private Version matchVersion;
// Used directly by IndexWriter:
AtomicBoolean inUseByIndexWriter = new AtomicBoolean();
/** /**
* Creates a new config that with defaults that match the specified * Creates a new config that with defaults that match the specified
* {@link Version} as well as the default {@link * {@link Version} as well as the default {@link
@ -170,59 +141,27 @@ public final class IndexWriterConfig implements Cloneable {
* {@link LogDocMergePolicy}. * {@link LogDocMergePolicy}.
*/ */
public IndexWriterConfig(Version matchVersion, Analyzer analyzer) { public IndexWriterConfig(Version matchVersion, Analyzer analyzer) {
this.matchVersion = matchVersion; super(analyzer, matchVersion);
this.analyzer = analyzer;
delPolicy = new KeepOnlyLastCommitDeletionPolicy();
commit = null;
openMode = OpenMode.CREATE_OR_APPEND;
similarity = IndexSearcher.getDefaultSimilarity();
termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL; // TODO: this should be private to the codec, not settable here
mergeScheduler = new ConcurrentMergeScheduler();
writeLockTimeout = WRITE_LOCK_TIMEOUT;
maxBufferedDeleteTerms = DEFAULT_MAX_BUFFERED_DELETE_TERMS;
ramBufferSizeMB = DEFAULT_RAM_BUFFER_SIZE_MB;
maxBufferedDocs = DEFAULT_MAX_BUFFERED_DOCS;
indexingChain = DocumentsWriterPerThread.defaultIndexingChain;
mergedSegmentWarmer = null;
codec = Codec.getDefault();
infoStream = InfoStream.getDefault();
mergePolicy = new TieredMergePolicy();
flushPolicy = new FlushByRamOrCountsPolicy();
readerPooling = DEFAULT_READER_POOLING;
indexerThreadPool = new ThreadAffinityDocumentsWriterThreadPool(DEFAULT_MAX_THREAD_STATES);
readerTermsIndexDivisor = DEFAULT_READER_TERMS_INDEX_DIVISOR;
perThreadHardLimitMB = DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB;
} }
@Override @Override
public IndexWriterConfig clone() { public IndexWriterConfig clone() {
IndexWriterConfig clone;
if (inUseByIndexWriter.get()) {
throw new IllegalStateException("cannot clone: this IndexWriterConfig is private to IndexWriter; make a new one instead");
}
try { try {
clone = (IndexWriterConfig) super.clone(); IndexWriterConfig clone = (IndexWriterConfig) super.clone();
// Mostly shallow clone, but do a deepish clone of
// certain objects that have state that cannot be shared
// across IW instances:
clone.flushPolicy = flushPolicy.clone();
clone.indexerThreadPool = indexerThreadPool.clone();
clone.mergePolicy = mergePolicy.clone();
return clone;
} catch (CloneNotSupportedException e) { } catch (CloneNotSupportedException e) {
// should not happen
throw new RuntimeException(e); throw new RuntimeException(e);
} }
// Mostly shallow clone, but do a deepish clone of
// certain objects that have state that cannot be shared
// across IW instances:
clone.inUseByIndexWriter = new AtomicBoolean();
clone.flushPolicy = flushPolicy.clone();
clone.indexerThreadPool = indexerThreadPool.clone();
clone.mergePolicy = mergePolicy.clone();
return clone;
} }
/** Returns the default analyzer to use for indexing documents. */
public Analyzer getAnalyzer() {
return analyzer;
}
/** Specifies {@link OpenMode} of the index. /** Specifies {@link OpenMode} of the index.
* *
* <p>Only takes effect when IndexWriter is first created. */ * <p>Only takes effect when IndexWriter is first created. */
@ -231,7 +170,7 @@ public final class IndexWriterConfig implements Cloneable {
return this; return this;
} }
/** Returns the {@link OpenMode} set by {@link #setOpenMode(OpenMode)}. */ @Override
public OpenMode getOpenMode() { public OpenMode getOpenMode() {
return openMode; return openMode;
} }
@ -258,11 +197,7 @@ public final class IndexWriterConfig implements Cloneable {
return this; return this;
} }
/** @Override
* Returns the {@link IndexDeletionPolicy} specified in
* {@link #setIndexDeletionPolicy(IndexDeletionPolicy)} or the default
* {@link KeepOnlyLastCommitDeletionPolicy}/
*/
public IndexDeletionPolicy getIndexDeletionPolicy() { public IndexDeletionPolicy getIndexDeletionPolicy() {
return delPolicy; return delPolicy;
} }
@ -277,11 +212,7 @@ public final class IndexWriterConfig implements Cloneable {
return this; return this;
} }
/** @Override
* Returns the {@link IndexCommit} as specified in
* {@link #setIndexCommit(IndexCommit)} or the default, <code>null</code>
* which specifies to open the latest index commit point.
*/
public IndexCommit getIndexCommit() { public IndexCommit getIndexCommit() {
return commit; return commit;
} }
@ -298,52 +229,11 @@ public final class IndexWriterConfig implements Cloneable {
return this; return this;
} }
/** @Override
* Expert: returns the {@link Similarity} implementation used by this
* IndexWriter.
*/
public Similarity getSimilarity() { public Similarity getSimilarity() {
return similarity; return similarity;
} }
/**
* Expert: set the interval between indexed terms. Large values cause less
* memory to be used by IndexReader, but slow random-access to terms. Small
* values cause more memory to be used by an IndexReader, and speed
* random-access to terms.
* <p>
* This parameter determines the amount of computation required per query
* term, regardless of the number of documents that contain that term. In
* particular, it is the maximum number of other terms that must be scanned
* before a term is located and its frequency and position information may be
* processed. In a large index with user-entered query terms, query processing
* time is likely to be dominated not by term lookup but rather by the
* processing of frequency and positional data. In a small index or when many
* uncommon query terms are generated (e.g., by wildcard queries) term lookup
* may become a dominant cost.
* <p>
* In particular, <code>numUniqueTerms/interval</code> terms are read into
* memory by an IndexReader, and, on average, <code>interval/2</code> terms
* must be scanned for each random term access.
*
* @see #DEFAULT_TERM_INDEX_INTERVAL
*
* <p>Takes effect immediately, but only applies to newly
* flushed/merged segments. */
public IndexWriterConfig setTermIndexInterval(int interval) { // TODO: this should be private to the codec, not settable here
this.termIndexInterval = interval;
return this;
}
/**
* Returns the interval between indexed terms.
*
* @see #setTermIndexInterval(int)
*/
public int getTermIndexInterval() { // TODO: this should be private to the codec, not settable here
return termIndexInterval;
}
/** /**
* Expert: sets the merge scheduler used by this writer. The default is * Expert: sets the merge scheduler used by this writer. The default is
* {@link ConcurrentMergeScheduler}. * {@link ConcurrentMergeScheduler}.
@ -357,10 +247,7 @@ public final class IndexWriterConfig implements Cloneable {
return this; return this;
} }
/** @Override
* Returns the {@link MergeScheduler} that was set by
* {@link #setMergeScheduler(MergeScheduler)}
*/
public MergeScheduler getMergeScheduler() { public MergeScheduler getMergeScheduler() {
return mergeScheduler; return mergeScheduler;
} }
@ -376,173 +263,11 @@ public final class IndexWriterConfig implements Cloneable {
return this; return this;
} }
/** @Override
* Returns allowed timeout when acquiring the write lock.
*
* @see #setWriteLockTimeout(long)
*/
public long getWriteLockTimeout() { public long getWriteLockTimeout() {
return writeLockTimeout; return writeLockTimeout;
} }
/**
* Determines the minimal number of delete terms required before the buffered
* in-memory delete terms and queries are applied and flushed.
* <p>Disabled by default (writer flushes by RAM usage).</p>
* <p>
* NOTE: This setting won't trigger a segment flush.
* </p>
*
* @throws IllegalArgumentException if maxBufferedDeleteTerms
* is enabled but smaller than 1
* @see #setRAMBufferSizeMB
* @see #setFlushPolicy(FlushPolicy)
*
* <p>Takes effect immediately, but only the next time a
* document is added, updated or deleted.
*/
public IndexWriterConfig setMaxBufferedDeleteTerms(int maxBufferedDeleteTerms) {
if (maxBufferedDeleteTerms != DISABLE_AUTO_FLUSH
&& maxBufferedDeleteTerms < 1)
throw new IllegalArgumentException(
"maxBufferedDeleteTerms must at least be 1 when enabled");
this.maxBufferedDeleteTerms = maxBufferedDeleteTerms;
return this;
}
/**
* Returns the number of buffered deleted terms that will trigger a flush of all
* buffered deletes if enabled.
*
* @see #setMaxBufferedDeleteTerms(int)
*/
public int getMaxBufferedDeleteTerms() {
return maxBufferedDeleteTerms;
}
/**
* Determines the amount of RAM that may be used for buffering added documents
* and deletions before they are flushed to the Directory. Generally for
* faster indexing performance it's best to flush by RAM usage instead of
* document count and use as large a RAM buffer as you can.
* <p>
* When this is set, the writer will flush whenever buffered documents and
* deletions use this much RAM. Pass in {@link #DISABLE_AUTO_FLUSH} to prevent
* triggering a flush due to RAM usage. Note that if flushing by document
* count is also enabled, then the flush will be triggered by whichever comes
* first.
* <p>
* The maximum RAM limit is inherently determined by the JVMs available memory.
* Yet, an {@link IndexWriter} session can consume a significantly larger amount
* of memory than the given RAM limit since this limit is just an indicator when
* to flush memory resident documents to the Directory. Flushes are likely happen
* concurrently while other threads adding documents to the writer. For application
* stability the available memory in the JVM should be significantly larger than
* the RAM buffer used for indexing.
* <p>
* <b>NOTE</b>: the account of RAM usage for pending deletions is only
* approximate. Specifically, if you delete by Query, Lucene currently has no
* way to measure the RAM usage of individual Queries so the accounting will
* under-estimate and you should compensate by either calling commit()
* periodically yourself, or by using {@link #setMaxBufferedDeleteTerms(int)}
* to flush and apply buffered deletes by count instead of RAM usage
* (for each buffered delete Query a constant number of bytes is used to estimate
* RAM usage). Note that enabling {@link #setMaxBufferedDeleteTerms(int)} will
* not trigger any segment flushes.
* <p>
* <b>NOTE</b>: It's not guaranteed that all memory resident documents are flushed
* once this limit is exceeded. Depending on the configured {@link FlushPolicy} only a
* subset of the buffered documents are flushed and therefore only parts of the RAM
* buffer is released.
* <p>
*
* The default value is {@link #DEFAULT_RAM_BUFFER_SIZE_MB}.
* @see #setFlushPolicy(FlushPolicy)
* @see #setRAMPerThreadHardLimitMB(int)
*
* <p>Takes effect immediately, but only the next time a
* document is added, updated or deleted.
*
* @throws IllegalArgumentException
* if ramBufferSize is enabled but non-positive, or it disables
* ramBufferSize when maxBufferedDocs is already disabled
*
*/
public IndexWriterConfig setRAMBufferSizeMB(double ramBufferSizeMB) {
if (ramBufferSizeMB != DISABLE_AUTO_FLUSH && ramBufferSizeMB <= 0.0)
throw new IllegalArgumentException(
"ramBufferSize should be > 0.0 MB when enabled");
if (ramBufferSizeMB == DISABLE_AUTO_FLUSH && maxBufferedDocs == DISABLE_AUTO_FLUSH)
throw new IllegalArgumentException(
"at least one of ramBufferSize and maxBufferedDocs must be enabled");
this.ramBufferSizeMB = ramBufferSizeMB;
return this;
}
/** Returns the value set by {@link #setRAMBufferSizeMB(double)} if enabled. */
public double getRAMBufferSizeMB() {
return ramBufferSizeMB;
}
/**
* Determines the minimal number of documents required before the buffered
* in-memory documents are flushed as a new Segment. Large values generally
* give faster indexing.
*
* <p>
* When this is set, the writer will flush every maxBufferedDocs added
* documents. Pass in {@link #DISABLE_AUTO_FLUSH} to prevent triggering a
* flush due to number of buffered documents. Note that if flushing by RAM
* usage is also enabled, then the flush will be triggered by whichever comes
* first.
*
* <p>
* Disabled by default (writer flushes by RAM usage).
*
* <p>Takes effect immediately, but only the next time a
* document is added, updated or deleted.
*
* @see #setRAMBufferSizeMB(double)
* @see #setFlushPolicy(FlushPolicy)
* @throws IllegalArgumentException
* if maxBufferedDocs is enabled but smaller than 2, or it disables
* maxBufferedDocs when ramBufferSize is already disabled
*/
public IndexWriterConfig setMaxBufferedDocs(int maxBufferedDocs) {
if (maxBufferedDocs != DISABLE_AUTO_FLUSH && maxBufferedDocs < 2)
throw new IllegalArgumentException(
"maxBufferedDocs must at least be 2 when enabled");
if (maxBufferedDocs == DISABLE_AUTO_FLUSH
&& ramBufferSizeMB == DISABLE_AUTO_FLUSH)
throw new IllegalArgumentException(
"at least one of ramBufferSize and maxBufferedDocs must be enabled");
this.maxBufferedDocs = maxBufferedDocs;
return this;
}
/**
* Returns the number of buffered added documents that will trigger a flush if
* enabled.
*
* @see #setMaxBufferedDocs(int)
*/
public int getMaxBufferedDocs() {
return maxBufferedDocs;
}
/** Set the merged segment warmer. See {@link IndexReaderWarmer}.
*
* <p>Takes effect on the next merge. */
public IndexWriterConfig setMergedSegmentWarmer(IndexReaderWarmer mergeSegmentWarmer) {
this.mergedSegmentWarmer = mergeSegmentWarmer;
return this;
}
/** Returns the current merged segment warmer. See {@link IndexReaderWarmer}. */
public IndexReaderWarmer getMergedSegmentWarmer() {
return mergedSegmentWarmer;
}
/** /**
* Expert: {@link MergePolicy} is invoked whenever there are changes to the * Expert: {@link MergePolicy} is invoked whenever there are changes to the
* segments in the index. Its role is to select which merges to do, if any, * segments in the index. Its role is to select which merges to do, if any,
@ -556,25 +281,24 @@ public final class IndexWriterConfig implements Cloneable {
return this; return this;
} }
/** Set the Codec. See {@link Codec}. /**
* * Set the {@link Codec}.
* <p>Only takes effect when IndexWriter is first created. */ *
* <p>
* Only takes effect when IndexWriter is first created.
*/
public IndexWriterConfig setCodec(Codec codec) { public IndexWriterConfig setCodec(Codec codec) {
this.codec = codec; this.codec = codec;
return this; return this;
} }
/** Returns the current Codec. See {@link Codec}. */ @Override
public Codec getCodec() { public Codec getCodec() {
return codec; return codec;
} }
/** @Override
* Returns the current MergePolicy in use by this writer.
*
* @see #setMergePolicy(MergePolicy)
*/
public MergePolicy getMergePolicy() { public MergePolicy getMergePolicy() {
return mergePolicy; return mergePolicy;
} }
@ -595,17 +319,15 @@ public final class IndexWriterConfig implements Cloneable {
* NOTE: This only takes effect when IndexWriter is first created.</p>*/ * NOTE: This only takes effect when IndexWriter is first created.</p>*/
IndexWriterConfig setIndexerThreadPool(DocumentsWriterPerThreadPool threadPool) { IndexWriterConfig setIndexerThreadPool(DocumentsWriterPerThreadPool threadPool) {
if (threadPool == null) { if (threadPool == null) {
throw new IllegalArgumentException("DocumentsWriterPerThreadPool must not be nul"); throw new IllegalArgumentException("threadPool must not be null");
} }
this.indexerThreadPool = threadPool; this.indexerThreadPool = threadPool;
return this; return this;
} }
/** Returns the configured {@link DocumentsWriterPerThreadPool} instance. @Override
* @see #setIndexerThreadPool(DocumentsWriterPerThreadPool)
* @return the configured {@link DocumentsWriterPerThreadPool} instance.*/
DocumentsWriterPerThreadPool getIndexerThreadPool() { DocumentsWriterPerThreadPool getIndexerThreadPool() {
return this.indexerThreadPool; return indexerThreadPool;
} }
/** /**
@ -620,8 +342,7 @@ public final class IndexWriterConfig implements Cloneable {
return this; return this;
} }
/** Returns the max number of simultaneous threads that @Override
* may be indexing documents at once in IndexWriter. */
public int getMaxThreadStates() { public int getMaxThreadStates() {
try { try {
return ((ThreadAffinityDocumentsWriterThreadPool) indexerThreadPool).getMaxThreadStates(); return ((ThreadAffinityDocumentsWriterThreadPool) indexerThreadPool).getMaxThreadStates();
@ -645,8 +366,7 @@ public final class IndexWriterConfig implements Cloneable {
return this; return this;
} }
/** Returns true if IndexWriter should pool readers even @Override
* if {@link DirectoryReader#open(IndexWriter, boolean)} has not been called. */
public boolean getReaderPooling() { public boolean getReaderPooling() {
return readerPooling; return readerPooling;
} }
@ -659,34 +379,11 @@ public final class IndexWriterConfig implements Cloneable {
return this; return this;
} }
/** Returns the indexing chain set on {@link #setIndexingChain(IndexingChain)}. */ @Override
IndexingChain getIndexingChain() { IndexingChain getIndexingChain() {
return indexingChain; return indexingChain;
} }
/** Sets the termsIndexDivisor passed to any readers that
* IndexWriter opens, for example when applying deletes
* or creating a near-real-time reader in {@link
* DirectoryReader#open(IndexWriter, boolean)}. If you pass -1, the terms index
* won't be loaded by the readers. This is only useful in
* advanced situations when you will only .next() through
* all terms; attempts to seek will hit an exception.
*
* <p>Takes effect immediately, but only applies to
* readers opened after this call */
public IndexWriterConfig setReaderTermsIndexDivisor(int divisor) {
if (divisor <= 0 && divisor != -1) {
throw new IllegalArgumentException("divisor must be >= 1, or -1 (got " + divisor + ")");
}
readerTermsIndexDivisor = divisor;
return this;
}
/** @see #setReaderTermsIndexDivisor(int) */
public int getReaderTermsIndexDivisor() {
return readerTermsIndexDivisor;
}
/** /**
* Expert: Controls when segments are flushed to disk during indexing. * Expert: Controls when segments are flushed to disk during indexing.
* The {@link FlushPolicy} initialized during {@link IndexWriter} instantiation and once initialized * The {@link FlushPolicy} initialized during {@link IndexWriter} instantiation and once initialized
@ -719,28 +416,56 @@ public final class IndexWriterConfig implements Cloneable {
return this; return this;
} }
/** @Override
* Returns the max amount of memory each {@link DocumentsWriterPerThread} can
* consume until forcefully flushed.
* @see #setRAMPerThreadHardLimitMB(int)
*/
public int getRAMPerThreadHardLimitMB() { public int getRAMPerThreadHardLimitMB() {
return perThreadHardLimitMB; return perThreadHardLimitMB;
} }
/**
* @see #setFlushPolicy(FlushPolicy) @Override
*/
public FlushPolicy getFlushPolicy() { public FlushPolicy getFlushPolicy() {
return flushPolicy; return flushPolicy;
} }
/** @Override
* @see #setInfoStream(InfoStream)
*/
public InfoStream getInfoStream() { public InfoStream getInfoStream() {
return infoStream; return infoStream;
} }
@Override
public Analyzer getAnalyzer() {
return super.getAnalyzer();
}
@Override
public int getMaxBufferedDeleteTerms() {
return super.getMaxBufferedDeleteTerms();
}
@Override
public int getMaxBufferedDocs() {
return super.getMaxBufferedDocs();
}
@Override
public IndexReaderWarmer getMergedSegmentWarmer() {
return super.getMergedSegmentWarmer();
}
@Override
public double getRAMBufferSizeMB() {
return super.getRAMBufferSizeMB();
}
@Override
public int getReaderTermsIndexDivisor() {
return super.getReaderTermsIndexDivisor();
}
@Override
public int getTermIndexInterval() {
return super.getTermIndexInterval();
}
/** If non-null, information about merges, deletes and a /** If non-null, information about merges, deletes and a
* message when maxFieldLength is reached will be printed * message when maxFieldLength is reached will be printed
* to this. * to this.
@ -754,40 +479,39 @@ public final class IndexWriterConfig implements Cloneable {
return this; return this;
} }
/** /** Convenience method that uses {@link PrintStreamInfoStream} */
* Convenience method that uses {@link PrintStreamInfoStream}
*/
public IndexWriterConfig setInfoStream(PrintStream printStream) { public IndexWriterConfig setInfoStream(PrintStream printStream) {
return setInfoStream(printStream == null ? InfoStream.NO_OUTPUT : new PrintStreamInfoStream(printStream)); return setInfoStream(printStream == null ? InfoStream.NO_OUTPUT : new PrintStreamInfoStream(printStream));
} }
@Override @Override
public String toString() { public IndexWriterConfig setMaxBufferedDeleteTerms(int maxBufferedDeleteTerms) {
StringBuilder sb = new StringBuilder(); return (IndexWriterConfig) super.setMaxBufferedDeleteTerms(maxBufferedDeleteTerms);
sb.append("matchVersion=").append(matchVersion).append("\n"); }
sb.append("analyzer=").append(analyzer == null ? "null" : analyzer.getClass().getName()).append("\n");
sb.append("delPolicy=").append(delPolicy.getClass().getName()).append("\n"); @Override
sb.append("commit=").append(commit == null ? "null" : commit).append("\n"); public IndexWriterConfig setMaxBufferedDocs(int maxBufferedDocs) {
sb.append("openMode=").append(openMode).append("\n"); return (IndexWriterConfig) super.setMaxBufferedDocs(maxBufferedDocs);
sb.append("similarity=").append(similarity.getClass().getName()).append("\n"); }
sb.append("termIndexInterval=").append(termIndexInterval).append("\n"); // TODO: this should be private to the codec, not settable here
sb.append("mergeScheduler=").append(mergeScheduler.getClass().getName()).append("\n"); @Override
sb.append("default WRITE_LOCK_TIMEOUT=").append(WRITE_LOCK_TIMEOUT).append("\n"); public IndexWriterConfig setMergedSegmentWarmer(IndexReaderWarmer mergeSegmentWarmer) {
sb.append("writeLockTimeout=").append(writeLockTimeout).append("\n"); return (IndexWriterConfig) super.setMergedSegmentWarmer(mergeSegmentWarmer);
sb.append("maxBufferedDeleteTerms=").append(maxBufferedDeleteTerms).append("\n"); }
sb.append("ramBufferSizeMB=").append(ramBufferSizeMB).append("\n");
sb.append("maxBufferedDocs=").append(maxBufferedDocs).append("\n"); @Override
sb.append("mergedSegmentWarmer=").append(mergedSegmentWarmer).append("\n"); public IndexWriterConfig setRAMBufferSizeMB(double ramBufferSizeMB) {
sb.append("codec=").append(codec).append("\n"); return (IndexWriterConfig) super.setRAMBufferSizeMB(ramBufferSizeMB);
sb.append("infoStream=").append(infoStream.getClass().getName()).append("\n"); }
sb.append("mergePolicy=").append(mergePolicy).append("\n");
sb.append("indexerThreadPool=").append(indexerThreadPool).append("\n"); @Override
sb.append("readerPooling=").append(readerPooling).append("\n"); public IndexWriterConfig setReaderTermsIndexDivisor(int divisor) {
sb.append("readerTermsIndexDivisor=").append(readerTermsIndexDivisor).append("\n"); return (IndexWriterConfig) super.setReaderTermsIndexDivisor(divisor);
sb.append("flushPolicy=").append(flushPolicy).append("\n"); }
sb.append("perThreadHardLimitMB=").append(perThreadHardLimitMB).append("\n");
@Override
return sb.toString(); public IndexWriterConfig setTermIndexInterval(int interval) {
return (IndexWriterConfig) super.setTermIndexInterval(interval);
} }
} }

View File

@ -0,0 +1,505 @@
package org.apache.lucene.index;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain;
import org.apache.lucene.index.IndexWriter.IndexReaderWarmer;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.Version;
/**
* Holds all the configuration used by {@link IndexWriter} with few setters for
* settings that can be changed on an {@link IndexWriter} instance "live".
*
* @since 4.0
*/
public class LiveIndexWriterConfig {
private final Analyzer analyzer;
private volatile int maxBufferedDocs;
private volatile double ramBufferSizeMB;
private volatile int maxBufferedDeleteTerms;
private volatile int readerTermsIndexDivisor;
private volatile IndexReaderWarmer mergedSegmentWarmer;
private volatile int termIndexInterval; // TODO: this should be private to the codec, not settable here
// modified by IndexWriterConfig
protected volatile IndexDeletionPolicy delPolicy;
protected volatile IndexCommit commit;
protected volatile OpenMode openMode;
protected volatile Similarity similarity;
protected volatile MergeScheduler mergeScheduler;
protected volatile long writeLockTimeout;
protected volatile IndexingChain indexingChain;
protected volatile Codec codec;
protected volatile InfoStream infoStream;
protected volatile MergePolicy mergePolicy;
protected volatile DocumentsWriterPerThreadPool indexerThreadPool;
protected volatile boolean readerPooling;
protected volatile FlushPolicy flushPolicy;
protected volatile int perThreadHardLimitMB;
protected final Version matchVersion;
// used by IndexWriterConfig
LiveIndexWriterConfig(Analyzer analyzer, Version matchVersion) {
this.analyzer = analyzer;
this.matchVersion = matchVersion;
ramBufferSizeMB = IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB;
maxBufferedDocs = IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS;
maxBufferedDeleteTerms = IndexWriterConfig.DEFAULT_MAX_BUFFERED_DELETE_TERMS;
readerTermsIndexDivisor = IndexWriterConfig.DEFAULT_READER_TERMS_INDEX_DIVISOR;
mergedSegmentWarmer = null;
termIndexInterval = IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL; // TODO: this should be private to the codec, not settable here
delPolicy = new KeepOnlyLastCommitDeletionPolicy();
commit = null;
openMode = OpenMode.CREATE_OR_APPEND;
similarity = IndexSearcher.getDefaultSimilarity();
mergeScheduler = new ConcurrentMergeScheduler();
writeLockTimeout = IndexWriterConfig.WRITE_LOCK_TIMEOUT;
indexingChain = DocumentsWriterPerThread.defaultIndexingChain;
codec = Codec.getDefault();
infoStream = InfoStream.getDefault();
mergePolicy = new TieredMergePolicy();
flushPolicy = new FlushByRamOrCountsPolicy();
readerPooling = IndexWriterConfig.DEFAULT_READER_POOLING;
indexerThreadPool = new ThreadAffinityDocumentsWriterThreadPool(IndexWriterConfig.DEFAULT_MAX_THREAD_STATES);
perThreadHardLimitMB = IndexWriterConfig.DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB;
}
/**
* Creates a new config that that handles the live {@link IndexWriter}
* settings.
*/
LiveIndexWriterConfig(IndexWriterConfig config) {
maxBufferedDeleteTerms = config.getMaxBufferedDeleteTerms();
maxBufferedDocs = config.getMaxBufferedDocs();
mergedSegmentWarmer = config.getMergedSegmentWarmer();
ramBufferSizeMB = config.getRAMBufferSizeMB();
readerTermsIndexDivisor = config.getReaderTermsIndexDivisor();
termIndexInterval = config.getTermIndexInterval();
matchVersion = config.matchVersion;
analyzer = config.getAnalyzer();
delPolicy = config.getIndexDeletionPolicy();
commit = config.getIndexCommit();
openMode = config.getOpenMode();
similarity = config.getSimilarity();
mergeScheduler = config.getMergeScheduler();
writeLockTimeout = config.getWriteLockTimeout();
indexingChain = config.getIndexingChain();
codec = config.getCodec();
infoStream = config.getInfoStream();
mergePolicy = config.getMergePolicy();
indexerThreadPool = config.getIndexerThreadPool();
readerPooling = config.getReaderPooling();
flushPolicy = config.getFlushPolicy();
perThreadHardLimitMB = config.getRAMPerThreadHardLimitMB();
}
/** Returns the default analyzer to use for indexing documents. */
public Analyzer getAnalyzer() {
return analyzer;
}
/**
* Expert: set the interval between indexed terms. Large values cause less
* memory to be used by IndexReader, but slow random-access to terms. Small
* values cause more memory to be used by an IndexReader, and speed
* random-access to terms.
* <p>
* This parameter determines the amount of computation required per query
* term, regardless of the number of documents that contain that term. In
* particular, it is the maximum number of other terms that must be scanned
* before a term is located and its frequency and position information may be
* processed. In a large index with user-entered query terms, query processing
* time is likely to be dominated not by term lookup but rather by the
* processing of frequency and positional data. In a small index or when many
* uncommon query terms are generated (e.g., by wildcard queries) term lookup
* may become a dominant cost.
* <p>
* In particular, <code>numUniqueTerms/interval</code> terms are read into
* memory by an IndexReader, and, on average, <code>interval/2</code> terms
* must be scanned for each random term access.
*
* <p>
* Takes effect immediately, but only applies to newly flushed/merged
* segments.
*
* @see IndexWriterConfig#DEFAULT_TERM_INDEX_INTERVAL
*/
public LiveIndexWriterConfig setTermIndexInterval(int interval) { // TODO: this should be private to the codec, not settable here
this.termIndexInterval = interval;
return this;
}
/**
* Returns the interval between indexed terms.
*
* @see #setTermIndexInterval(int)
*/
public int getTermIndexInterval() { // TODO: this should be private to the codec, not settable here
return termIndexInterval;
}
/**
* Determines the minimal number of delete terms required before the buffered
* in-memory delete terms and queries are applied and flushed.
* <p>
* Disabled by default (writer flushes by RAM usage).
* <p>
* NOTE: This setting won't trigger a segment flush.
*
* <p>
* Takes effect immediately, but only the next time a document is added,
* updated or deleted.
*
* @throws IllegalArgumentException
* if maxBufferedDeleteTerms is enabled but smaller than 1
*
* @see #setRAMBufferSizeMB
*/
public LiveIndexWriterConfig setMaxBufferedDeleteTerms(int maxBufferedDeleteTerms) {
if (maxBufferedDeleteTerms != IndexWriterConfig.DISABLE_AUTO_FLUSH && maxBufferedDeleteTerms < 1) {
throw new IllegalArgumentException("maxBufferedDeleteTerms must at least be 1 when enabled");
}
this.maxBufferedDeleteTerms = maxBufferedDeleteTerms;
return this;
}
/**
* Returns the number of buffered deleted terms that will trigger a flush of all
* buffered deletes if enabled.
*
* @see #setMaxBufferedDeleteTerms(int)
*/
public int getMaxBufferedDeleteTerms() {
return maxBufferedDeleteTerms;
}
/**
* Determines the amount of RAM that may be used for buffering added documents
* and deletions before they are flushed to the Directory. Generally for
* faster indexing performance it's best to flush by RAM usage instead of
* document count and use as large a RAM buffer as you can.
* <p>
* When this is set, the writer will flush whenever buffered documents and
* deletions use this much RAM. Pass in
* {@link IndexWriterConfig#DISABLE_AUTO_FLUSH} to prevent triggering a flush
* due to RAM usage. Note that if flushing by document count is also enabled,
* then the flush will be triggered by whichever comes first.
* <p>
* The maximum RAM limit is inherently determined by the JVMs available
* memory. Yet, an {@link IndexWriter} session can consume a significantly
* larger amount of memory than the given RAM limit since this limit is just
* an indicator when to flush memory resident documents to the Directory.
* Flushes are likely happen concurrently while other threads adding documents
* to the writer. For application stability the available memory in the JVM
* should be significantly larger than the RAM buffer used for indexing.
* <p>
* <b>NOTE</b>: the account of RAM usage for pending deletions is only
* approximate. Specifically, if you delete by Query, Lucene currently has no
* way to measure the RAM usage of individual Queries so the accounting will
* under-estimate and you should compensate by either calling commit()
* periodically yourself, or by using {@link #setMaxBufferedDeleteTerms(int)}
* to flush and apply buffered deletes by count instead of RAM usage (for each
* buffered delete Query a constant number of bytes is used to estimate RAM
* usage). Note that enabling {@link #setMaxBufferedDeleteTerms(int)} will not
* trigger any segment flushes.
* <p>
* <b>NOTE</b>: It's not guaranteed that all memory resident documents are
* flushed once this limit is exceeded. Depending on the configured
* {@link FlushPolicy} only a subset of the buffered documents are flushed and
* therefore only parts of the RAM buffer is released.
* <p>
*
* The default value is {@link IndexWriterConfig#DEFAULT_RAM_BUFFER_SIZE_MB}.
*
* <p>
* Takes effect immediately, but only the next time a document is added,
* updated or deleted.
*
* @see IndexWriterConfig#setRAMPerThreadHardLimitMB(int)
*
* @throws IllegalArgumentException
* if ramBufferSize is enabled but non-positive, or it disables
* ramBufferSize when maxBufferedDocs is already disabled
*/
public LiveIndexWriterConfig setRAMBufferSizeMB(double ramBufferSizeMB) {
if (ramBufferSizeMB != IndexWriterConfig.DISABLE_AUTO_FLUSH && ramBufferSizeMB <= 0.0) {
throw new IllegalArgumentException("ramBufferSize should be > 0.0 MB when enabled");
}
if (ramBufferSizeMB == IndexWriterConfig.DISABLE_AUTO_FLUSH
&& maxBufferedDocs == IndexWriterConfig.DISABLE_AUTO_FLUSH) {
throw new IllegalArgumentException("at least one of ramBufferSize and maxBufferedDocs must be enabled");
}
this.ramBufferSizeMB = ramBufferSizeMB;
return this;
}
/** Returns the value set by {@link #setRAMBufferSizeMB(double)} if enabled. */
public double getRAMBufferSizeMB() {
return ramBufferSizeMB;
}
/**
* Determines the minimal number of documents required before the buffered
* in-memory documents are flushed as a new Segment. Large values generally
* give faster indexing.
*
* <p>
* When this is set, the writer will flush every maxBufferedDocs added
* documents. Pass in {@link IndexWriterConfig#DISABLE_AUTO_FLUSH} to prevent
* triggering a flush due to number of buffered documents. Note that if
* flushing by RAM usage is also enabled, then the flush will be triggered by
* whichever comes first.
*
* <p>
* Disabled by default (writer flushes by RAM usage).
*
* <p>
* Takes effect immediately, but only the next time a document is added,
* updated or deleted.
*
* @see #setRAMBufferSizeMB(double)
* @throws IllegalArgumentException
* if maxBufferedDocs is enabled but smaller than 2, or it disables
* maxBufferedDocs when ramBufferSize is already disabled
*/
public LiveIndexWriterConfig setMaxBufferedDocs(int maxBufferedDocs) {
if (maxBufferedDocs != IndexWriterConfig.DISABLE_AUTO_FLUSH && maxBufferedDocs < 2) {
throw new IllegalArgumentException("maxBufferedDocs must at least be 2 when enabled");
}
if (maxBufferedDocs == IndexWriterConfig.DISABLE_AUTO_FLUSH
&& ramBufferSizeMB == IndexWriterConfig.DISABLE_AUTO_FLUSH) {
throw new IllegalArgumentException("at least one of ramBufferSize and maxBufferedDocs must be enabled");
}
this.maxBufferedDocs = maxBufferedDocs;
return this;
}
/**
* Returns the number of buffered added documents that will trigger a flush if
* enabled.
*
* @see #setMaxBufferedDocs(int)
*/
public int getMaxBufferedDocs() {
return maxBufferedDocs;
}
/**
* Set the merged segment warmer. See {@link IndexReaderWarmer}.
*
* <p>
* Takes effect on the next merge.
*/
public LiveIndexWriterConfig setMergedSegmentWarmer(IndexReaderWarmer mergeSegmentWarmer) {
this.mergedSegmentWarmer = mergeSegmentWarmer;
return this;
}
/** Returns the current merged segment warmer. See {@link IndexReaderWarmer}. */
public IndexReaderWarmer getMergedSegmentWarmer() {
return mergedSegmentWarmer;
}
/**
* Sets the termsIndexDivisor passed to any readers that IndexWriter opens,
* for example when applying deletes or creating a near-real-time reader in
* {@link DirectoryReader#open(IndexWriter, boolean)}. If you pass -1, the
* terms index won't be loaded by the readers. This is only useful in advanced
* situations when you will only .next() through all terms; attempts to seek
* will hit an exception.
*
* <p>
* Takes effect immediately, but only applies to readers opened after this
* call
*/
public LiveIndexWriterConfig setReaderTermsIndexDivisor(int divisor) {
if (divisor <= 0 && divisor != -1) {
throw new IllegalArgumentException("divisor must be >= 1, or -1 (got " + divisor + ")");
}
readerTermsIndexDivisor = divisor;
return this;
}
/** @see #setReaderTermsIndexDivisor(int) */
public int getReaderTermsIndexDivisor() {
return readerTermsIndexDivisor;
}
/** Returns the {@link OpenMode} set by {@link IndexWriterConfig#setOpenMode(OpenMode)}. */
public OpenMode getOpenMode() {
return openMode;
}
/**
* Returns the {@link IndexDeletionPolicy} specified in
* {@link IndexWriterConfig#setIndexDeletionPolicy(IndexDeletionPolicy)} or
* the default {@link KeepOnlyLastCommitDeletionPolicy}/
*/
public IndexDeletionPolicy getIndexDeletionPolicy() {
return delPolicy;
}
/**
* Returns the {@link IndexCommit} as specified in
* {@link IndexWriterConfig#setIndexCommit(IndexCommit)} or the default,
* {@code null} which specifies to open the latest index commit point.
*/
public IndexCommit getIndexCommit() {
return commit;
}
/**
* Expert: returns the {@link Similarity} implementation used by this
* {@link IndexWriter}.
*/
public Similarity getSimilarity() {
return similarity;
}
/**
* Returns the {@link MergeScheduler} that was set by
* {@link IndexWriterConfig#setMergeScheduler(MergeScheduler)}.
*/
public MergeScheduler getMergeScheduler() {
return mergeScheduler;
}
/**
* Returns allowed timeout when acquiring the write lock.
*
* @see IndexWriterConfig#setWriteLockTimeout(long)
*/
public long getWriteLockTimeout() {
return writeLockTimeout;
}
/** Returns the current {@link Codec}. */
public Codec getCodec() {
return codec;
}
/**
* Returns the current MergePolicy in use by this writer.
*
* @see IndexWriterConfig#setMergePolicy(MergePolicy)
*/
public MergePolicy getMergePolicy() {
return mergePolicy;
}
/**
* Returns the configured {@link DocumentsWriterPerThreadPool} instance.
*
* @see IndexWriterConfig#setIndexerThreadPool(DocumentsWriterPerThreadPool)
* @return the configured {@link DocumentsWriterPerThreadPool} instance.
*/
DocumentsWriterPerThreadPool getIndexerThreadPool() {
return indexerThreadPool;
}
/**
* Returns the max number of simultaneous threads that may be indexing
* documents at once in IndexWriter.
*/
public int getMaxThreadStates() {
try {
return ((ThreadAffinityDocumentsWriterThreadPool) indexerThreadPool).getMaxThreadStates();
} catch (ClassCastException cce) {
throw new IllegalStateException(cce);
}
}
/**
* Returns {@code true} if {@link IndexWriter} should pool readers even if
* {@link DirectoryReader#open(IndexWriter, boolean)} has not been called.
*/
public boolean getReaderPooling() {
return readerPooling;
}
/**
* Returns the indexing chain set on
* {@link IndexWriterConfig#setIndexingChain(IndexingChain)}.
*/
IndexingChain getIndexingChain() {
return indexingChain;
}
/**
* Returns the max amount of memory each {@link DocumentsWriterPerThread} can
* consume until forcefully flushed.
*
* @see IndexWriterConfig#setRAMPerThreadHardLimitMB(int)
*/
public int getRAMPerThreadHardLimitMB() {
return perThreadHardLimitMB;
}
/**
* @see IndexWriterConfig#setFlushPolicy(FlushPolicy)
*/
public FlushPolicy getFlushPolicy() {
return flushPolicy;
}
/**
* @see IndexWriterConfig#setInfoStream(InfoStream)
*/
public InfoStream getInfoStream() {
return infoStream;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("matchVersion=").append(matchVersion).append("\n");
sb.append("analyzer=").append(analyzer == null ? "null" : analyzer.getClass().getName()).append("\n");
sb.append("ramBufferSizeMB=").append(getRAMBufferSizeMB()).append("\n");
sb.append("maxBufferedDocs=").append(getMaxBufferedDocs()).append("\n");
sb.append("maxBufferedDeleteTerms=").append(getMaxBufferedDeleteTerms()).append("\n");
sb.append("mergedSegmentWarmer=").append(getMergeScheduler()).append("\n");
sb.append("readerTermsIndexDivisor=").append(getReaderTermsIndexDivisor()).append("\n");
sb.append("termIndexInterval=").append(getTermIndexInterval()).append("\n"); // TODO: this should be private to the codec, not settable here
sb.append("delPolicy=").append(getIndexDeletionPolicy().getClass().getName()).append("\n");
IndexCommit commit = getIndexCommit();
sb.append("commit=").append(commit == null ? "null" : commit).append("\n");
sb.append("openMode=").append(getOpenMode()).append("\n");
sb.append("similarity=").append(getSimilarity().getClass().getName()).append("\n");
sb.append("mergeScheduler=").append(getMergeScheduler().getClass().getName()).append("\n");
sb.append("default WRITE_LOCK_TIMEOUT=").append(IndexWriterConfig.WRITE_LOCK_TIMEOUT).append("\n");
sb.append("writeLockTimeout=").append(getWriteLockTimeout()).append("\n");
sb.append("codec=").append(getCodec()).append("\n");
sb.append("infoStream=").append(getInfoStream().getClass().getName()).append("\n");
sb.append("mergePolicy=").append(getMergePolicy()).append("\n");
sb.append("indexerThreadPool=").append(getIndexerThreadPool()).append("\n");
sb.append("readerPooling=").append(getReaderPooling()).append("\n");
sb.append("flushPolicy=").append(getFlushPolicy()).append("\n");
sb.append("perThreadHardLimitMB=").append(getRAMPerThreadHardLimitMB()).append("\n");
return sb.toString();
}
}

View File

@ -294,7 +294,7 @@ public class TestFlushByRamOrCountsPolicy extends LuceneTestCase {
public class IndexThread extends Thread { public class IndexThread extends Thread {
IndexWriter writer; IndexWriter writer;
IndexWriterConfig iwc; LiveIndexWriterConfig iwc;
LineFileDocs docs; LineFileDocs docs;
private AtomicInteger pendingDocs; private AtomicInteger pendingDocs;
private final boolean doRandomCommit; private final boolean doRandomCommit;

View File

@ -25,8 +25,6 @@ import java.util.Set;
import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.Codec;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain; import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain;
import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
@ -114,18 +112,70 @@ public class TestIndexWriterConfig extends LuceneTestCase {
@Test @Test
public void testSettersChaining() throws Exception { public void testSettersChaining() throws Exception {
// Ensures that every setter returns IndexWriterConfig to enable easy // Ensures that every setter returns IndexWriterConfig to allow chaining.
// chaining. HashSet<String> liveSetters = new HashSet<String>();
HashSet<String> allSetters = new HashSet<String>();
for (Method m : IndexWriterConfig.class.getDeclaredMethods()) { for (Method m : IndexWriterConfig.class.getDeclaredMethods()) {
if (m.getDeclaringClass() == IndexWriterConfig.class if (m.getName().startsWith("set") && !Modifier.isStatic(m.getModifiers())) {
&& m.getName().startsWith("set") allSetters.add(m.getName());
&& !Modifier.isStatic(m.getModifiers())) { // setters overridden from LiveIndexWriterConfig are returned twice, once with
assertEquals("method " + m.getName() + " does not return IndexWriterConfig", // IndexWriterConfig return type and second with LiveIndexWriterConfig. The ones
IndexWriterConfig.class, m.getReturnType()); // from LiveIndexWriterConfig are marked 'synthetic', so just collect them and
// assert in the end that we also received them from IWC.
if (m.isSynthetic()) {
liveSetters.add(m.getName());
} else {
assertEquals("method " + m.getName() + " does not return IndexWriterConfig",
IndexWriterConfig.class, m.getReturnType());
}
} }
} }
for (String setter : liveSetters) {
assertTrue("setter method not overridden by IndexWriterConfig: " + setter, allSetters.contains(setter));
}
} }
@Test
public void testReuse() throws Exception {
Directory dir = newDirectory();
// test that if the same IWC is reused across two IWs, it is cloned by each.
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, conf);
LiveIndexWriterConfig liveConf1 = iw.w.getConfig();
iw.close();
iw = new RandomIndexWriter(random(), dir, conf);
LiveIndexWriterConfig liveConf2 = iw.w.getConfig();
iw.close();
// LiveIndexWriterConfig's "copy" constructor doesn't clone objects.
assertNotSame("IndexWriterConfig should have been cloned", liveConf1.getMergePolicy(), liveConf2.getMergePolicy());
dir.close();
}
@Test
public void testOverrideGetters() throws Exception {
// Test that IndexWriterConfig overrides all getters, so that javadocs
// contain all methods for the users. Also, ensures that IndexWriterConfig
// doesn't declare getters that are not declared on LiveIWC.
HashSet<String> liveGetters = new HashSet<String>();
for (Method m : LiveIndexWriterConfig.class.getDeclaredMethods()) {
if (m.getName().startsWith("get") && !Modifier.isStatic(m.getModifiers())) {
liveGetters.add(m.getName());
}
}
for (Method m : IndexWriterConfig.class.getDeclaredMethods()) {
if (m.getName().startsWith("get") && !Modifier.isStatic(m.getModifiers())) {
assertEquals("method " + m.getName() + " not overrided by IndexWriterConfig",
IndexWriterConfig.class, m.getDeclaringClass());
assertTrue("method " + m.getName() + " not declared on LiveIndexWriterConfig",
liveGetters.contains(m.getName()));
}
}
}
@Test @Test
public void testConstants() throws Exception { public void testConstants() throws Exception {
// Tests that the values of the constants does not change // Tests that the values of the constants does not change
@ -276,53 +326,4 @@ public class TestIndexWriterConfig extends LuceneTestCase {
assertEquals(LogByteSizeMergePolicy.class, conf.getMergePolicy().getClass()); assertEquals(LogByteSizeMergePolicy.class, conf.getMergePolicy().getClass());
} }
public void testReuse() throws Exception {
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
Directory dir = newDirectory();
Document doc = new Document();
doc.add(newTextField("foo", "bar", Store.YES));
RandomIndexWriter riw = new RandomIndexWriter(random(), dir, iwc);
riw.addDocument(doc);
riw.close();
// Sharing IWC should be fine:
riw = new RandomIndexWriter(random(), dir, iwc);
riw.addDocument(doc);
riw.close();
dir.close();
}
public void testIWCClone() throws Exception {
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
Directory dir = newDirectory();
RandomIndexWriter riw = new RandomIndexWriter(random(), dir, iwc);
// Cannot clone IW's private IWC clone:
try {
riw.w.getConfig().clone();
fail("did not hit expected exception");
} catch (IllegalStateException ise) {
// expected
}
riw.close();
dir.close();
}
public void testIWCInvalidReuse() throws Exception {
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
Directory dir = newDirectory();
RandomIndexWriter riw = new RandomIndexWriter(random(), dir, iwc);
IndexWriterConfig privateIWC = riw.w.getConfig();
riw.close();
// Cannot clone IW's private IWC clone:
try {
new RandomIndexWriter(random(), dir, privateIWC);
fail("did not hit expected exception");
} catch (IllegalStateException ise) {
// expected
}
dir.close();
}
} }