diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index de6f8fbc2e6..73437751cac 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -10,6 +10,12 @@ New features * LUCENE-4108: add replaceTaxonomy to DirectoryTaxonomyWriter, which replaces the taxonomy in place with the given one. (Shai Erera) + +API Changes + +* LUCENE-4132: IndexWriter.getConfig() now returns a LiveIndexWriterConfig object + which can be used to change the IndexWriter's live settings. IndexWriterConfig + is used only for initializing the IndexWriter. (Shai Erera) ======================= Lucene 4.0.0-ALPHA ======================= diff --git a/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java b/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java index 25e32dd6799..1dc05e46730 100644 --- a/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java @@ -133,7 +133,7 @@ final class DocumentsWriter { final DocumentsWriterFlushControl flushControl; final Codec codec; - DocumentsWriter(Codec codec, IndexWriterConfig config, Directory directory, IndexWriter writer, FieldNumbers globalFieldNumbers, + DocumentsWriter(Codec codec, LiveIndexWriterConfig config, Directory directory, IndexWriter writer, FieldNumbers globalFieldNumbers, BufferedDeletesStream bufferedDeletesStream) throws IOException { this.codec = codec; this.directory = directory; diff --git a/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterFlushControl.java b/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterFlushControl.java index 07256773899..ff38d7d0c97 100644 --- a/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterFlushControl.java +++ b/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterFlushControl.java @@ -65,10 +65,9 @@ final class DocumentsWriterFlushControl implements MemoryController { private final FlushPolicy flushPolicy; private boolean closed = false; private final DocumentsWriter documentsWriter; - private final IndexWriterConfig config; + private final LiveIndexWriterConfig config; - DocumentsWriterFlushControl(DocumentsWriter documentsWriter, - IndexWriterConfig config) { + DocumentsWriterFlushControl(DocumentsWriter documentsWriter, LiveIndexWriterConfig config) { this.stallControl = new DocumentsWriterStallControl(); this.perThreadPool = documentsWriter.perThreadPool; this.flushPolicy = documentsWriter.flushPolicy; diff --git a/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThreadPool.java b/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThreadPool.java index f82e7e5f235..a7a208ff65a 100644 --- a/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThreadPool.java +++ b/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThreadPool.java @@ -135,7 +135,7 @@ abstract class DocumentsWriterPerThreadPool implements Cloneable { numThreadStatesActive = 0; } - void initialize(DocumentsWriter documentsWriter, FieldNumbers globalFieldMap, IndexWriterConfig config) { + void initialize(DocumentsWriter documentsWriter, FieldNumbers globalFieldMap, LiveIndexWriterConfig config) { this.documentsWriter.set(documentsWriter); // thread pool is bound to DW this.globalFieldMap.set(globalFieldMap); for (int i = 0; i < threadStates.length; i++) { diff --git a/lucene/core/src/java/org/apache/lucene/index/FlushPolicy.java b/lucene/core/src/java/org/apache/lucene/index/FlushPolicy.java index 88c777ad577..9645479b4bf 100644 --- a/lucene/core/src/java/org/apache/lucene/index/FlushPolicy.java +++ b/lucene/core/src/java/org/apache/lucene/index/FlushPolicy.java @@ -52,7 +52,7 @@ import org.apache.lucene.util.SetOnce; */ abstract class FlushPolicy implements Cloneable { protected SetOnce writer = new SetOnce(); - protected IndexWriterConfig indexWriterConfig; + protected LiveIndexWriterConfig indexWriterConfig; /** * Called for each delete term. If this is a delete triggered due to an update diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index daaaa32082b..eb46001853b 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -258,7 +258,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { // The instance that was passed to the constructor. It is saved only in order // to allow users to query an IndexWriter settings. - private final IndexWriterConfig config; + private final LiveIndexWriterConfig config; // The PayloadProcessorProvider to use when segments are merged private PayloadProcessorProvider payloadProcessorProvider; @@ -586,11 +586,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { */ public IndexWriter(Directory d, IndexWriterConfig conf) throws CorruptIndexException, LockObtainFailedException, IOException { - if (conf.inUseByIndexWriter.get()) { - throw new IllegalStateException("the provided IndexWriterConfig was previously used by a different IndexWriter; please make a new one instead"); - } - config = conf.clone(); - config.inUseByIndexWriter.set(true); + config = new LiveIndexWriterConfig(conf.clone()); directory = d; analyzer = config.getAnalyzer(); infoStream = config.getInfoStream(); @@ -757,17 +753,10 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { } /** - * Returns the private {@link IndexWriterConfig}, cloned - * from the {@link IndexWriterConfig} passed to - * {@link #IndexWriter(Directory, IndexWriterConfig)}. - *

- * NOTE: some settings may be changed on the - * returned {@link IndexWriterConfig}, and will take - * effect in the current IndexWriter instance. See the - * javadocs for the specific setters in {@link - * IndexWriterConfig} for details. + * Returns a {@link LiveIndexWriterConfig}, which can be used to query the IndexWriter + * current settings, as well as modify "live" ones. */ - public IndexWriterConfig getConfig() { + public LiveIndexWriterConfig getConfig() { ensureOpen(false); return config; } diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java index bc69dabef27..2cec18c1f3f 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java @@ -18,7 +18,6 @@ package org.apache.lucene.index; */ import java.io.PrintStream; -import java.util.concurrent.atomic.AtomicBoolean; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.codecs.Codec; @@ -28,17 +27,14 @@ import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.util.InfoStream; import org.apache.lucene.util.PrintStreamInfoStream; -import org.apache.lucene.util.SetOnce; import org.apache.lucene.util.Version; /** - * Holds all the configuration of {@link IndexWriter}. You - * should instantiate this class, call the setters to set - * your configuration, then pass it to {@link IndexWriter}. - * Note that {@link IndexWriter} makes a private clone; if - * you need to subsequently change settings use {@link - * IndexWriter#getConfig}. - * + * Holds all the configuration that is used to create an {@link IndexWriter}. + * Once {@link IndexWriter} has been created with this object, changes to this + * object will not affect the {@link IndexWriter} instance. For that, use + * {@link LiveIndexWriterConfig} that is returned from {@link IndexWriter#getConfig()}. + * *

* All setter methods return {@link IndexWriterConfig} to allow chaining * settings conveniently, for example: @@ -47,10 +43,12 @@ import org.apache.lucene.util.Version; * IndexWriterConfig conf = new IndexWriterConfig(analyzer); * conf.setter1().setter2(); * - * + * + * @see IndexWriter#getConfig() + * * @since 3.1 */ -public final class IndexWriterConfig implements Cloneable { +public final class IndexWriterConfig extends LiveIndexWriterConfig implements Cloneable { /** * Specifies the open mode for {@link IndexWriter}. @@ -131,33 +129,6 @@ public final class IndexWriterConfig implements Cloneable { return WRITE_LOCK_TIMEOUT; } - private final Analyzer analyzer; - private volatile IndexDeletionPolicy delPolicy; - private volatile IndexCommit commit; - private volatile OpenMode openMode; - private volatile Similarity similarity; - private volatile int termIndexInterval; // TODO: this should be private to the codec, not settable here - private volatile MergeScheduler mergeScheduler; - private volatile long writeLockTimeout; - private volatile int maxBufferedDeleteTerms; - private volatile double ramBufferSizeMB; - private volatile int maxBufferedDocs; - private volatile IndexingChain indexingChain; - private volatile IndexReaderWarmer mergedSegmentWarmer; - private volatile Codec codec; - private volatile InfoStream infoStream; - private volatile MergePolicy mergePolicy; - private volatile DocumentsWriterPerThreadPool indexerThreadPool; - private volatile boolean readerPooling; - private volatile int readerTermsIndexDivisor; - private volatile FlushPolicy flushPolicy; - private volatile int perThreadHardLimitMB; - - private Version matchVersion; - - // Used directly by IndexWriter: - AtomicBoolean inUseByIndexWriter = new AtomicBoolean(); - /** * Creates a new config that with defaults that match the specified * {@link Version} as well as the default {@link @@ -170,59 +141,27 @@ public final class IndexWriterConfig implements Cloneable { * {@link LogDocMergePolicy}. */ public IndexWriterConfig(Version matchVersion, Analyzer analyzer) { - this.matchVersion = matchVersion; - this.analyzer = analyzer; - delPolicy = new KeepOnlyLastCommitDeletionPolicy(); - commit = null; - openMode = OpenMode.CREATE_OR_APPEND; - similarity = IndexSearcher.getDefaultSimilarity(); - termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL; // TODO: this should be private to the codec, not settable here - mergeScheduler = new ConcurrentMergeScheduler(); - writeLockTimeout = WRITE_LOCK_TIMEOUT; - maxBufferedDeleteTerms = DEFAULT_MAX_BUFFERED_DELETE_TERMS; - ramBufferSizeMB = DEFAULT_RAM_BUFFER_SIZE_MB; - maxBufferedDocs = DEFAULT_MAX_BUFFERED_DOCS; - indexingChain = DocumentsWriterPerThread.defaultIndexingChain; - mergedSegmentWarmer = null; - codec = Codec.getDefault(); - infoStream = InfoStream.getDefault(); - mergePolicy = new TieredMergePolicy(); - flushPolicy = new FlushByRamOrCountsPolicy(); - readerPooling = DEFAULT_READER_POOLING; - indexerThreadPool = new ThreadAffinityDocumentsWriterThreadPool(DEFAULT_MAX_THREAD_STATES); - readerTermsIndexDivisor = DEFAULT_READER_TERMS_INDEX_DIVISOR; - perThreadHardLimitMB = DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB; + super(analyzer, matchVersion); } @Override public IndexWriterConfig clone() { - IndexWriterConfig clone; - if (inUseByIndexWriter.get()) { - throw new IllegalStateException("cannot clone: this IndexWriterConfig is private to IndexWriter; make a new one instead"); - } try { - clone = (IndexWriterConfig) super.clone(); + IndexWriterConfig clone = (IndexWriterConfig) super.clone(); + + // Mostly shallow clone, but do a deepish clone of + // certain objects that have state that cannot be shared + // across IW instances: + clone.flushPolicy = flushPolicy.clone(); + clone.indexerThreadPool = indexerThreadPool.clone(); + clone.mergePolicy = mergePolicy.clone(); + + return clone; } catch (CloneNotSupportedException e) { - // should not happen throw new RuntimeException(e); } - - // Mostly shallow clone, but do a deepish clone of - // certain objects that have state that cannot be shared - // across IW instances: - clone.inUseByIndexWriter = new AtomicBoolean(); - clone.flushPolicy = flushPolicy.clone(); - clone.indexerThreadPool = indexerThreadPool.clone(); - clone.mergePolicy = mergePolicy.clone(); - - return clone; } - - /** Returns the default analyzer to use for indexing documents. */ - public Analyzer getAnalyzer() { - return analyzer; - } - + /** Specifies {@link OpenMode} of the index. * *

Only takes effect when IndexWriter is first created. */ @@ -231,7 +170,7 @@ public final class IndexWriterConfig implements Cloneable { return this; } - /** Returns the {@link OpenMode} set by {@link #setOpenMode(OpenMode)}. */ + @Override public OpenMode getOpenMode() { return openMode; } @@ -258,11 +197,7 @@ public final class IndexWriterConfig implements Cloneable { return this; } - /** - * Returns the {@link IndexDeletionPolicy} specified in - * {@link #setIndexDeletionPolicy(IndexDeletionPolicy)} or the default - * {@link KeepOnlyLastCommitDeletionPolicy}/ - */ + @Override public IndexDeletionPolicy getIndexDeletionPolicy() { return delPolicy; } @@ -277,11 +212,7 @@ public final class IndexWriterConfig implements Cloneable { return this; } - /** - * Returns the {@link IndexCommit} as specified in - * {@link #setIndexCommit(IndexCommit)} or the default, null - * which specifies to open the latest index commit point. - */ + @Override public IndexCommit getIndexCommit() { return commit; } @@ -298,52 +229,11 @@ public final class IndexWriterConfig implements Cloneable { return this; } - /** - * Expert: returns the {@link Similarity} implementation used by this - * IndexWriter. - */ + @Override public Similarity getSimilarity() { return similarity; } - /** - * Expert: set the interval between indexed terms. Large values cause less - * memory to be used by IndexReader, but slow random-access to terms. Small - * values cause more memory to be used by an IndexReader, and speed - * random-access to terms. - *

- * This parameter determines the amount of computation required per query - * term, regardless of the number of documents that contain that term. In - * particular, it is the maximum number of other terms that must be scanned - * before a term is located and its frequency and position information may be - * processed. In a large index with user-entered query terms, query processing - * time is likely to be dominated not by term lookup but rather by the - * processing of frequency and positional data. In a small index or when many - * uncommon query terms are generated (e.g., by wildcard queries) term lookup - * may become a dominant cost. - *

- * In particular, numUniqueTerms/interval terms are read into - * memory by an IndexReader, and, on average, interval/2 terms - * must be scanned for each random term access. - * - * @see #DEFAULT_TERM_INDEX_INTERVAL - * - *

Takes effect immediately, but only applies to newly - * flushed/merged segments. */ - public IndexWriterConfig setTermIndexInterval(int interval) { // TODO: this should be private to the codec, not settable here - this.termIndexInterval = interval; - return this; - } - - /** - * Returns the interval between indexed terms. - * - * @see #setTermIndexInterval(int) - */ - public int getTermIndexInterval() { // TODO: this should be private to the codec, not settable here - return termIndexInterval; - } - /** * Expert: sets the merge scheduler used by this writer. The default is * {@link ConcurrentMergeScheduler}. @@ -357,10 +247,7 @@ public final class IndexWriterConfig implements Cloneable { return this; } - /** - * Returns the {@link MergeScheduler} that was set by - * {@link #setMergeScheduler(MergeScheduler)} - */ + @Override public MergeScheduler getMergeScheduler() { return mergeScheduler; } @@ -376,173 +263,11 @@ public final class IndexWriterConfig implements Cloneable { return this; } - /** - * Returns allowed timeout when acquiring the write lock. - * - * @see #setWriteLockTimeout(long) - */ + @Override public long getWriteLockTimeout() { return writeLockTimeout; } - /** - * Determines the minimal number of delete terms required before the buffered - * in-memory delete terms and queries are applied and flushed. - *

Disabled by default (writer flushes by RAM usage).

- *

- * NOTE: This setting won't trigger a segment flush. - *

- * - * @throws IllegalArgumentException if maxBufferedDeleteTerms - * is enabled but smaller than 1 - * @see #setRAMBufferSizeMB - * @see #setFlushPolicy(FlushPolicy) - * - *

Takes effect immediately, but only the next time a - * document is added, updated or deleted. - */ - public IndexWriterConfig setMaxBufferedDeleteTerms(int maxBufferedDeleteTerms) { - if (maxBufferedDeleteTerms != DISABLE_AUTO_FLUSH - && maxBufferedDeleteTerms < 1) - throw new IllegalArgumentException( - "maxBufferedDeleteTerms must at least be 1 when enabled"); - this.maxBufferedDeleteTerms = maxBufferedDeleteTerms; - return this; - } - - /** - * Returns the number of buffered deleted terms that will trigger a flush of all - * buffered deletes if enabled. - * - * @see #setMaxBufferedDeleteTerms(int) - */ - public int getMaxBufferedDeleteTerms() { - return maxBufferedDeleteTerms; - } - - /** - * Determines the amount of RAM that may be used for buffering added documents - * and deletions before they are flushed to the Directory. Generally for - * faster indexing performance it's best to flush by RAM usage instead of - * document count and use as large a RAM buffer as you can. - *

- * When this is set, the writer will flush whenever buffered documents and - * deletions use this much RAM. Pass in {@link #DISABLE_AUTO_FLUSH} to prevent - * triggering a flush due to RAM usage. Note that if flushing by document - * count is also enabled, then the flush will be triggered by whichever comes - * first. - *

- * The maximum RAM limit is inherently determined by the JVMs available memory. - * Yet, an {@link IndexWriter} session can consume a significantly larger amount - * of memory than the given RAM limit since this limit is just an indicator when - * to flush memory resident documents to the Directory. Flushes are likely happen - * concurrently while other threads adding documents to the writer. For application - * stability the available memory in the JVM should be significantly larger than - * the RAM buffer used for indexing. - *

- * NOTE: the account of RAM usage for pending deletions is only - * approximate. Specifically, if you delete by Query, Lucene currently has no - * way to measure the RAM usage of individual Queries so the accounting will - * under-estimate and you should compensate by either calling commit() - * periodically yourself, or by using {@link #setMaxBufferedDeleteTerms(int)} - * to flush and apply buffered deletes by count instead of RAM usage - * (for each buffered delete Query a constant number of bytes is used to estimate - * RAM usage). Note that enabling {@link #setMaxBufferedDeleteTerms(int)} will - * not trigger any segment flushes. - *

- * NOTE: It's not guaranteed that all memory resident documents are flushed - * once this limit is exceeded. Depending on the configured {@link FlushPolicy} only a - * subset of the buffered documents are flushed and therefore only parts of the RAM - * buffer is released. - *

- * - * The default value is {@link #DEFAULT_RAM_BUFFER_SIZE_MB}. - * @see #setFlushPolicy(FlushPolicy) - * @see #setRAMPerThreadHardLimitMB(int) - * - *

Takes effect immediately, but only the next time a - * document is added, updated or deleted. - * - * @throws IllegalArgumentException - * if ramBufferSize is enabled but non-positive, or it disables - * ramBufferSize when maxBufferedDocs is already disabled - * - */ - public IndexWriterConfig setRAMBufferSizeMB(double ramBufferSizeMB) { - if (ramBufferSizeMB != DISABLE_AUTO_FLUSH && ramBufferSizeMB <= 0.0) - throw new IllegalArgumentException( - "ramBufferSize should be > 0.0 MB when enabled"); - if (ramBufferSizeMB == DISABLE_AUTO_FLUSH && maxBufferedDocs == DISABLE_AUTO_FLUSH) - throw new IllegalArgumentException( - "at least one of ramBufferSize and maxBufferedDocs must be enabled"); - this.ramBufferSizeMB = ramBufferSizeMB; - return this; - } - - /** Returns the value set by {@link #setRAMBufferSizeMB(double)} if enabled. */ - public double getRAMBufferSizeMB() { - return ramBufferSizeMB; - } - - /** - * Determines the minimal number of documents required before the buffered - * in-memory documents are flushed as a new Segment. Large values generally - * give faster indexing. - * - *

- * When this is set, the writer will flush every maxBufferedDocs added - * documents. Pass in {@link #DISABLE_AUTO_FLUSH} to prevent triggering a - * flush due to number of buffered documents. Note that if flushing by RAM - * usage is also enabled, then the flush will be triggered by whichever comes - * first. - * - *

- * Disabled by default (writer flushes by RAM usage). - * - *

Takes effect immediately, but only the next time a - * document is added, updated or deleted. - * - * @see #setRAMBufferSizeMB(double) - * @see #setFlushPolicy(FlushPolicy) - * @throws IllegalArgumentException - * if maxBufferedDocs is enabled but smaller than 2, or it disables - * maxBufferedDocs when ramBufferSize is already disabled - */ - public IndexWriterConfig setMaxBufferedDocs(int maxBufferedDocs) { - if (maxBufferedDocs != DISABLE_AUTO_FLUSH && maxBufferedDocs < 2) - throw new IllegalArgumentException( - "maxBufferedDocs must at least be 2 when enabled"); - if (maxBufferedDocs == DISABLE_AUTO_FLUSH - && ramBufferSizeMB == DISABLE_AUTO_FLUSH) - throw new IllegalArgumentException( - "at least one of ramBufferSize and maxBufferedDocs must be enabled"); - this.maxBufferedDocs = maxBufferedDocs; - return this; - } - - /** - * Returns the number of buffered added documents that will trigger a flush if - * enabled. - * - * @see #setMaxBufferedDocs(int) - */ - public int getMaxBufferedDocs() { - return maxBufferedDocs; - } - - /** Set the merged segment warmer. See {@link IndexReaderWarmer}. - * - *

Takes effect on the next merge. */ - public IndexWriterConfig setMergedSegmentWarmer(IndexReaderWarmer mergeSegmentWarmer) { - this.mergedSegmentWarmer = mergeSegmentWarmer; - return this; - } - - /** Returns the current merged segment warmer. See {@link IndexReaderWarmer}. */ - public IndexReaderWarmer getMergedSegmentWarmer() { - return mergedSegmentWarmer; - } - /** * Expert: {@link MergePolicy} is invoked whenever there are changes to the * segments in the index. Its role is to select which merges to do, if any, @@ -556,25 +281,24 @@ public final class IndexWriterConfig implements Cloneable { return this; } - /** Set the Codec. See {@link Codec}. - * - *

Only takes effect when IndexWriter is first created. */ + /** + * Set the {@link Codec}. + * + *

+ * Only takes effect when IndexWriter is first created. + */ public IndexWriterConfig setCodec(Codec codec) { this.codec = codec; return this; } - /** Returns the current Codec. See {@link Codec}. */ + @Override public Codec getCodec() { return codec; } - /** - * Returns the current MergePolicy in use by this writer. - * - * @see #setMergePolicy(MergePolicy) - */ + @Override public MergePolicy getMergePolicy() { return mergePolicy; } @@ -595,17 +319,15 @@ public final class IndexWriterConfig implements Cloneable { * NOTE: This only takes effect when IndexWriter is first created.

*/ IndexWriterConfig setIndexerThreadPool(DocumentsWriterPerThreadPool threadPool) { if (threadPool == null) { - throw new IllegalArgumentException("DocumentsWriterPerThreadPool must not be nul"); + throw new IllegalArgumentException("threadPool must not be null"); } this.indexerThreadPool = threadPool; return this; } - /** Returns the configured {@link DocumentsWriterPerThreadPool} instance. - * @see #setIndexerThreadPool(DocumentsWriterPerThreadPool) - * @return the configured {@link DocumentsWriterPerThreadPool} instance.*/ + @Override DocumentsWriterPerThreadPool getIndexerThreadPool() { - return this.indexerThreadPool; + return indexerThreadPool; } /** @@ -620,8 +342,7 @@ public final class IndexWriterConfig implements Cloneable { return this; } - /** Returns the max number of simultaneous threads that - * may be indexing documents at once in IndexWriter. */ + @Override public int getMaxThreadStates() { try { return ((ThreadAffinityDocumentsWriterThreadPool) indexerThreadPool).getMaxThreadStates(); @@ -645,8 +366,7 @@ public final class IndexWriterConfig implements Cloneable { return this; } - /** Returns true if IndexWriter should pool readers even - * if {@link DirectoryReader#open(IndexWriter, boolean)} has not been called. */ + @Override public boolean getReaderPooling() { return readerPooling; } @@ -659,34 +379,11 @@ public final class IndexWriterConfig implements Cloneable { return this; } - /** Returns the indexing chain set on {@link #setIndexingChain(IndexingChain)}. */ + @Override IndexingChain getIndexingChain() { return indexingChain; } - /** Sets the termsIndexDivisor passed to any readers that - * IndexWriter opens, for example when applying deletes - * or creating a near-real-time reader in {@link - * DirectoryReader#open(IndexWriter, boolean)}. If you pass -1, the terms index - * won't be loaded by the readers. This is only useful in - * advanced situations when you will only .next() through - * all terms; attempts to seek will hit an exception. - * - *

Takes effect immediately, but only applies to - * readers opened after this call */ - public IndexWriterConfig setReaderTermsIndexDivisor(int divisor) { - if (divisor <= 0 && divisor != -1) { - throw new IllegalArgumentException("divisor must be >= 1, or -1 (got " + divisor + ")"); - } - readerTermsIndexDivisor = divisor; - return this; - } - - /** @see #setReaderTermsIndexDivisor(int) */ - public int getReaderTermsIndexDivisor() { - return readerTermsIndexDivisor; - } - /** * Expert: Controls when segments are flushed to disk during indexing. * The {@link FlushPolicy} initialized during {@link IndexWriter} instantiation and once initialized @@ -719,28 +416,56 @@ public final class IndexWriterConfig implements Cloneable { return this; } - /** - * Returns the max amount of memory each {@link DocumentsWriterPerThread} can - * consume until forcefully flushed. - * @see #setRAMPerThreadHardLimitMB(int) - */ + @Override public int getRAMPerThreadHardLimitMB() { return perThreadHardLimitMB; } - /** - * @see #setFlushPolicy(FlushPolicy) - */ + + @Override public FlushPolicy getFlushPolicy() { return flushPolicy; } - /** - * @see #setInfoStream(InfoStream) - */ + @Override public InfoStream getInfoStream() { return infoStream; } + @Override + public Analyzer getAnalyzer() { + return super.getAnalyzer(); + } + + @Override + public int getMaxBufferedDeleteTerms() { + return super.getMaxBufferedDeleteTerms(); + } + + @Override + public int getMaxBufferedDocs() { + return super.getMaxBufferedDocs(); + } + + @Override + public IndexReaderWarmer getMergedSegmentWarmer() { + return super.getMergedSegmentWarmer(); + } + + @Override + public double getRAMBufferSizeMB() { + return super.getRAMBufferSizeMB(); + } + + @Override + public int getReaderTermsIndexDivisor() { + return super.getReaderTermsIndexDivisor(); + } + + @Override + public int getTermIndexInterval() { + return super.getTermIndexInterval(); + } + /** If non-null, information about merges, deletes and a * message when maxFieldLength is reached will be printed * to this. @@ -754,40 +479,39 @@ public final class IndexWriterConfig implements Cloneable { return this; } - /** - * Convenience method that uses {@link PrintStreamInfoStream} - */ + /** Convenience method that uses {@link PrintStreamInfoStream} */ public IndexWriterConfig setInfoStream(PrintStream printStream) { return setInfoStream(printStream == null ? InfoStream.NO_OUTPUT : new PrintStreamInfoStream(printStream)); } - + @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - sb.append("matchVersion=").append(matchVersion).append("\n"); - sb.append("analyzer=").append(analyzer == null ? "null" : analyzer.getClass().getName()).append("\n"); - sb.append("delPolicy=").append(delPolicy.getClass().getName()).append("\n"); - sb.append("commit=").append(commit == null ? "null" : commit).append("\n"); - sb.append("openMode=").append(openMode).append("\n"); - sb.append("similarity=").append(similarity.getClass().getName()).append("\n"); - sb.append("termIndexInterval=").append(termIndexInterval).append("\n"); // TODO: this should be private to the codec, not settable here - sb.append("mergeScheduler=").append(mergeScheduler.getClass().getName()).append("\n"); - sb.append("default WRITE_LOCK_TIMEOUT=").append(WRITE_LOCK_TIMEOUT).append("\n"); - sb.append("writeLockTimeout=").append(writeLockTimeout).append("\n"); - sb.append("maxBufferedDeleteTerms=").append(maxBufferedDeleteTerms).append("\n"); - sb.append("ramBufferSizeMB=").append(ramBufferSizeMB).append("\n"); - sb.append("maxBufferedDocs=").append(maxBufferedDocs).append("\n"); - sb.append("mergedSegmentWarmer=").append(mergedSegmentWarmer).append("\n"); - sb.append("codec=").append(codec).append("\n"); - sb.append("infoStream=").append(infoStream.getClass().getName()).append("\n"); - sb.append("mergePolicy=").append(mergePolicy).append("\n"); - sb.append("indexerThreadPool=").append(indexerThreadPool).append("\n"); - sb.append("readerPooling=").append(readerPooling).append("\n"); - sb.append("readerTermsIndexDivisor=").append(readerTermsIndexDivisor).append("\n"); - sb.append("flushPolicy=").append(flushPolicy).append("\n"); - sb.append("perThreadHardLimitMB=").append(perThreadHardLimitMB).append("\n"); - - return sb.toString(); + public IndexWriterConfig setMaxBufferedDeleteTerms(int maxBufferedDeleteTerms) { + return (IndexWriterConfig) super.setMaxBufferedDeleteTerms(maxBufferedDeleteTerms); + } + + @Override + public IndexWriterConfig setMaxBufferedDocs(int maxBufferedDocs) { + return (IndexWriterConfig) super.setMaxBufferedDocs(maxBufferedDocs); + } + + @Override + public IndexWriterConfig setMergedSegmentWarmer(IndexReaderWarmer mergeSegmentWarmer) { + return (IndexWriterConfig) super.setMergedSegmentWarmer(mergeSegmentWarmer); + } + + @Override + public IndexWriterConfig setRAMBufferSizeMB(double ramBufferSizeMB) { + return (IndexWriterConfig) super.setRAMBufferSizeMB(ramBufferSizeMB); + } + + @Override + public IndexWriterConfig setReaderTermsIndexDivisor(int divisor) { + return (IndexWriterConfig) super.setReaderTermsIndexDivisor(divisor); + } + + @Override + public IndexWriterConfig setTermIndexInterval(int interval) { + return (IndexWriterConfig) super.setTermIndexInterval(interval); } } diff --git a/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java b/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java new file mode 100755 index 00000000000..7bcdff9ca55 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java @@ -0,0 +1,505 @@ +package org.apache.lucene.index; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain; +import org.apache.lucene.index.IndexWriter.IndexReaderWarmer; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.util.InfoStream; +import org.apache.lucene.util.Version; + +/** + * Holds all the configuration used by {@link IndexWriter} with few setters for + * settings that can be changed on an {@link IndexWriter} instance "live". + * + * @since 4.0 + */ +public class LiveIndexWriterConfig { + + private final Analyzer analyzer; + + private volatile int maxBufferedDocs; + private volatile double ramBufferSizeMB; + private volatile int maxBufferedDeleteTerms; + private volatile int readerTermsIndexDivisor; + private volatile IndexReaderWarmer mergedSegmentWarmer; + private volatile int termIndexInterval; // TODO: this should be private to the codec, not settable here + + // modified by IndexWriterConfig + protected volatile IndexDeletionPolicy delPolicy; + protected volatile IndexCommit commit; + protected volatile OpenMode openMode; + protected volatile Similarity similarity; + protected volatile MergeScheduler mergeScheduler; + protected volatile long writeLockTimeout; + protected volatile IndexingChain indexingChain; + protected volatile Codec codec; + protected volatile InfoStream infoStream; + protected volatile MergePolicy mergePolicy; + protected volatile DocumentsWriterPerThreadPool indexerThreadPool; + protected volatile boolean readerPooling; + protected volatile FlushPolicy flushPolicy; + protected volatile int perThreadHardLimitMB; + + protected final Version matchVersion; + + // used by IndexWriterConfig + LiveIndexWriterConfig(Analyzer analyzer, Version matchVersion) { + this.analyzer = analyzer; + this.matchVersion = matchVersion; + ramBufferSizeMB = IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB; + maxBufferedDocs = IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS; + maxBufferedDeleteTerms = IndexWriterConfig.DEFAULT_MAX_BUFFERED_DELETE_TERMS; + readerTermsIndexDivisor = IndexWriterConfig.DEFAULT_READER_TERMS_INDEX_DIVISOR; + mergedSegmentWarmer = null; + termIndexInterval = IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL; // TODO: this should be private to the codec, not settable here + delPolicy = new KeepOnlyLastCommitDeletionPolicy(); + commit = null; + openMode = OpenMode.CREATE_OR_APPEND; + similarity = IndexSearcher.getDefaultSimilarity(); + mergeScheduler = new ConcurrentMergeScheduler(); + writeLockTimeout = IndexWriterConfig.WRITE_LOCK_TIMEOUT; + indexingChain = DocumentsWriterPerThread.defaultIndexingChain; + codec = Codec.getDefault(); + infoStream = InfoStream.getDefault(); + mergePolicy = new TieredMergePolicy(); + flushPolicy = new FlushByRamOrCountsPolicy(); + readerPooling = IndexWriterConfig.DEFAULT_READER_POOLING; + indexerThreadPool = new ThreadAffinityDocumentsWriterThreadPool(IndexWriterConfig.DEFAULT_MAX_THREAD_STATES); + perThreadHardLimitMB = IndexWriterConfig.DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB; + } + + /** + * Creates a new config that that handles the live {@link IndexWriter} + * settings. + */ + LiveIndexWriterConfig(IndexWriterConfig config) { + maxBufferedDeleteTerms = config.getMaxBufferedDeleteTerms(); + maxBufferedDocs = config.getMaxBufferedDocs(); + mergedSegmentWarmer = config.getMergedSegmentWarmer(); + ramBufferSizeMB = config.getRAMBufferSizeMB(); + readerTermsIndexDivisor = config.getReaderTermsIndexDivisor(); + termIndexInterval = config.getTermIndexInterval(); + matchVersion = config.matchVersion; + analyzer = config.getAnalyzer(); + delPolicy = config.getIndexDeletionPolicy(); + commit = config.getIndexCommit(); + openMode = config.getOpenMode(); + similarity = config.getSimilarity(); + mergeScheduler = config.getMergeScheduler(); + writeLockTimeout = config.getWriteLockTimeout(); + indexingChain = config.getIndexingChain(); + codec = config.getCodec(); + infoStream = config.getInfoStream(); + mergePolicy = config.getMergePolicy(); + indexerThreadPool = config.getIndexerThreadPool(); + readerPooling = config.getReaderPooling(); + flushPolicy = config.getFlushPolicy(); + perThreadHardLimitMB = config.getRAMPerThreadHardLimitMB(); + } + + /** Returns the default analyzer to use for indexing documents. */ + public Analyzer getAnalyzer() { + return analyzer; + } + + /** + * Expert: set the interval between indexed terms. Large values cause less + * memory to be used by IndexReader, but slow random-access to terms. Small + * values cause more memory to be used by an IndexReader, and speed + * random-access to terms. + *

+ * This parameter determines the amount of computation required per query + * term, regardless of the number of documents that contain that term. In + * particular, it is the maximum number of other terms that must be scanned + * before a term is located and its frequency and position information may be + * processed. In a large index with user-entered query terms, query processing + * time is likely to be dominated not by term lookup but rather by the + * processing of frequency and positional data. In a small index or when many + * uncommon query terms are generated (e.g., by wildcard queries) term lookup + * may become a dominant cost. + *

+ * In particular, numUniqueTerms/interval terms are read into + * memory by an IndexReader, and, on average, interval/2 terms + * must be scanned for each random term access. + * + *

+ * Takes effect immediately, but only applies to newly flushed/merged + * segments. + * + * @see IndexWriterConfig#DEFAULT_TERM_INDEX_INTERVAL + */ + public LiveIndexWriterConfig setTermIndexInterval(int interval) { // TODO: this should be private to the codec, not settable here + this.termIndexInterval = interval; + return this; + } + + /** + * Returns the interval between indexed terms. + * + * @see #setTermIndexInterval(int) + */ + public int getTermIndexInterval() { // TODO: this should be private to the codec, not settable here + return termIndexInterval; + } + + /** + * Determines the minimal number of delete terms required before the buffered + * in-memory delete terms and queries are applied and flushed. + *

+ * Disabled by default (writer flushes by RAM usage). + *

+ * NOTE: This setting won't trigger a segment flush. + * + *

+ * Takes effect immediately, but only the next time a document is added, + * updated or deleted. + * + * @throws IllegalArgumentException + * if maxBufferedDeleteTerms is enabled but smaller than 1 + * + * @see #setRAMBufferSizeMB + */ + public LiveIndexWriterConfig setMaxBufferedDeleteTerms(int maxBufferedDeleteTerms) { + if (maxBufferedDeleteTerms != IndexWriterConfig.DISABLE_AUTO_FLUSH && maxBufferedDeleteTerms < 1) { + throw new IllegalArgumentException("maxBufferedDeleteTerms must at least be 1 when enabled"); + } + this.maxBufferedDeleteTerms = maxBufferedDeleteTerms; + return this; + } + + /** + * Returns the number of buffered deleted terms that will trigger a flush of all + * buffered deletes if enabled. + * + * @see #setMaxBufferedDeleteTerms(int) + */ + public int getMaxBufferedDeleteTerms() { + return maxBufferedDeleteTerms; + } + + /** + * Determines the amount of RAM that may be used for buffering added documents + * and deletions before they are flushed to the Directory. Generally for + * faster indexing performance it's best to flush by RAM usage instead of + * document count and use as large a RAM buffer as you can. + *

+ * When this is set, the writer will flush whenever buffered documents and + * deletions use this much RAM. Pass in + * {@link IndexWriterConfig#DISABLE_AUTO_FLUSH} to prevent triggering a flush + * due to RAM usage. Note that if flushing by document count is also enabled, + * then the flush will be triggered by whichever comes first. + *

+ * The maximum RAM limit is inherently determined by the JVMs available + * memory. Yet, an {@link IndexWriter} session can consume a significantly + * larger amount of memory than the given RAM limit since this limit is just + * an indicator when to flush memory resident documents to the Directory. + * Flushes are likely happen concurrently while other threads adding documents + * to the writer. For application stability the available memory in the JVM + * should be significantly larger than the RAM buffer used for indexing. + *

+ * NOTE: the account of RAM usage for pending deletions is only + * approximate. Specifically, if you delete by Query, Lucene currently has no + * way to measure the RAM usage of individual Queries so the accounting will + * under-estimate and you should compensate by either calling commit() + * periodically yourself, or by using {@link #setMaxBufferedDeleteTerms(int)} + * to flush and apply buffered deletes by count instead of RAM usage (for each + * buffered delete Query a constant number of bytes is used to estimate RAM + * usage). Note that enabling {@link #setMaxBufferedDeleteTerms(int)} will not + * trigger any segment flushes. + *

+ * NOTE: It's not guaranteed that all memory resident documents are + * flushed once this limit is exceeded. Depending on the configured + * {@link FlushPolicy} only a subset of the buffered documents are flushed and + * therefore only parts of the RAM buffer is released. + *

+ * + * The default value is {@link IndexWriterConfig#DEFAULT_RAM_BUFFER_SIZE_MB}. + * + *

+ * Takes effect immediately, but only the next time a document is added, + * updated or deleted. + * + * @see IndexWriterConfig#setRAMPerThreadHardLimitMB(int) + * + * @throws IllegalArgumentException + * if ramBufferSize is enabled but non-positive, or it disables + * ramBufferSize when maxBufferedDocs is already disabled + */ + public LiveIndexWriterConfig setRAMBufferSizeMB(double ramBufferSizeMB) { + if (ramBufferSizeMB != IndexWriterConfig.DISABLE_AUTO_FLUSH && ramBufferSizeMB <= 0.0) { + throw new IllegalArgumentException("ramBufferSize should be > 0.0 MB when enabled"); + } + if (ramBufferSizeMB == IndexWriterConfig.DISABLE_AUTO_FLUSH + && maxBufferedDocs == IndexWriterConfig.DISABLE_AUTO_FLUSH) { + throw new IllegalArgumentException("at least one of ramBufferSize and maxBufferedDocs must be enabled"); + } + this.ramBufferSizeMB = ramBufferSizeMB; + return this; + } + + /** Returns the value set by {@link #setRAMBufferSizeMB(double)} if enabled. */ + public double getRAMBufferSizeMB() { + return ramBufferSizeMB; + } + + /** + * Determines the minimal number of documents required before the buffered + * in-memory documents are flushed as a new Segment. Large values generally + * give faster indexing. + * + *

+ * When this is set, the writer will flush every maxBufferedDocs added + * documents. Pass in {@link IndexWriterConfig#DISABLE_AUTO_FLUSH} to prevent + * triggering a flush due to number of buffered documents. Note that if + * flushing by RAM usage is also enabled, then the flush will be triggered by + * whichever comes first. + * + *

+ * Disabled by default (writer flushes by RAM usage). + * + *

+ * Takes effect immediately, but only the next time a document is added, + * updated or deleted. + * + * @see #setRAMBufferSizeMB(double) + * @throws IllegalArgumentException + * if maxBufferedDocs is enabled but smaller than 2, or it disables + * maxBufferedDocs when ramBufferSize is already disabled + */ + public LiveIndexWriterConfig setMaxBufferedDocs(int maxBufferedDocs) { + if (maxBufferedDocs != IndexWriterConfig.DISABLE_AUTO_FLUSH && maxBufferedDocs < 2) { + throw new IllegalArgumentException("maxBufferedDocs must at least be 2 when enabled"); + } + if (maxBufferedDocs == IndexWriterConfig.DISABLE_AUTO_FLUSH + && ramBufferSizeMB == IndexWriterConfig.DISABLE_AUTO_FLUSH) { + throw new IllegalArgumentException("at least one of ramBufferSize and maxBufferedDocs must be enabled"); + } + this.maxBufferedDocs = maxBufferedDocs; + return this; + } + + /** + * Returns the number of buffered added documents that will trigger a flush if + * enabled. + * + * @see #setMaxBufferedDocs(int) + */ + public int getMaxBufferedDocs() { + return maxBufferedDocs; + } + + /** + * Set the merged segment warmer. See {@link IndexReaderWarmer}. + * + *

+ * Takes effect on the next merge. + */ + public LiveIndexWriterConfig setMergedSegmentWarmer(IndexReaderWarmer mergeSegmentWarmer) { + this.mergedSegmentWarmer = mergeSegmentWarmer; + return this; + } + + /** Returns the current merged segment warmer. See {@link IndexReaderWarmer}. */ + public IndexReaderWarmer getMergedSegmentWarmer() { + return mergedSegmentWarmer; + } + + /** + * Sets the termsIndexDivisor passed to any readers that IndexWriter opens, + * for example when applying deletes or creating a near-real-time reader in + * {@link DirectoryReader#open(IndexWriter, boolean)}. If you pass -1, the + * terms index won't be loaded by the readers. This is only useful in advanced + * situations when you will only .next() through all terms; attempts to seek + * will hit an exception. + * + *

+ * Takes effect immediately, but only applies to readers opened after this + * call + */ + public LiveIndexWriterConfig setReaderTermsIndexDivisor(int divisor) { + if (divisor <= 0 && divisor != -1) { + throw new IllegalArgumentException("divisor must be >= 1, or -1 (got " + divisor + ")"); + } + readerTermsIndexDivisor = divisor; + return this; + } + + /** @see #setReaderTermsIndexDivisor(int) */ + public int getReaderTermsIndexDivisor() { + return readerTermsIndexDivisor; + } + + /** Returns the {@link OpenMode} set by {@link IndexWriterConfig#setOpenMode(OpenMode)}. */ + public OpenMode getOpenMode() { + return openMode; + } + + /** + * Returns the {@link IndexDeletionPolicy} specified in + * {@link IndexWriterConfig#setIndexDeletionPolicy(IndexDeletionPolicy)} or + * the default {@link KeepOnlyLastCommitDeletionPolicy}/ + */ + public IndexDeletionPolicy getIndexDeletionPolicy() { + return delPolicy; + } + + /** + * Returns the {@link IndexCommit} as specified in + * {@link IndexWriterConfig#setIndexCommit(IndexCommit)} or the default, + * {@code null} which specifies to open the latest index commit point. + */ + public IndexCommit getIndexCommit() { + return commit; + } + + /** + * Expert: returns the {@link Similarity} implementation used by this + * {@link IndexWriter}. + */ + public Similarity getSimilarity() { + return similarity; + } + + /** + * Returns the {@link MergeScheduler} that was set by + * {@link IndexWriterConfig#setMergeScheduler(MergeScheduler)}. + */ + public MergeScheduler getMergeScheduler() { + return mergeScheduler; + } + + /** + * Returns allowed timeout when acquiring the write lock. + * + * @see IndexWriterConfig#setWriteLockTimeout(long) + */ + public long getWriteLockTimeout() { + return writeLockTimeout; + } + + /** Returns the current {@link Codec}. */ + public Codec getCodec() { + return codec; + } + + /** + * Returns the current MergePolicy in use by this writer. + * + * @see IndexWriterConfig#setMergePolicy(MergePolicy) + */ + public MergePolicy getMergePolicy() { + return mergePolicy; + } + + /** + * Returns the configured {@link DocumentsWriterPerThreadPool} instance. + * + * @see IndexWriterConfig#setIndexerThreadPool(DocumentsWriterPerThreadPool) + * @return the configured {@link DocumentsWriterPerThreadPool} instance. + */ + DocumentsWriterPerThreadPool getIndexerThreadPool() { + return indexerThreadPool; + } + + /** + * Returns the max number of simultaneous threads that may be indexing + * documents at once in IndexWriter. + */ + public int getMaxThreadStates() { + try { + return ((ThreadAffinityDocumentsWriterThreadPool) indexerThreadPool).getMaxThreadStates(); + } catch (ClassCastException cce) { + throw new IllegalStateException(cce); + } + } + + /** + * Returns {@code true} if {@link IndexWriter} should pool readers even if + * {@link DirectoryReader#open(IndexWriter, boolean)} has not been called. + */ + public boolean getReaderPooling() { + return readerPooling; + } + + /** + * Returns the indexing chain set on + * {@link IndexWriterConfig#setIndexingChain(IndexingChain)}. + */ + IndexingChain getIndexingChain() { + return indexingChain; + } + + /** + * Returns the max amount of memory each {@link DocumentsWriterPerThread} can + * consume until forcefully flushed. + * + * @see IndexWriterConfig#setRAMPerThreadHardLimitMB(int) + */ + public int getRAMPerThreadHardLimitMB() { + return perThreadHardLimitMB; + } + + /** + * @see IndexWriterConfig#setFlushPolicy(FlushPolicy) + */ + public FlushPolicy getFlushPolicy() { + return flushPolicy; + } + + /** + * @see IndexWriterConfig#setInfoStream(InfoStream) + */ + public InfoStream getInfoStream() { + return infoStream; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("matchVersion=").append(matchVersion).append("\n"); + sb.append("analyzer=").append(analyzer == null ? "null" : analyzer.getClass().getName()).append("\n"); + sb.append("ramBufferSizeMB=").append(getRAMBufferSizeMB()).append("\n"); + sb.append("maxBufferedDocs=").append(getMaxBufferedDocs()).append("\n"); + sb.append("maxBufferedDeleteTerms=").append(getMaxBufferedDeleteTerms()).append("\n"); + sb.append("mergedSegmentWarmer=").append(getMergeScheduler()).append("\n"); + sb.append("readerTermsIndexDivisor=").append(getReaderTermsIndexDivisor()).append("\n"); + sb.append("termIndexInterval=").append(getTermIndexInterval()).append("\n"); // TODO: this should be private to the codec, not settable here + sb.append("delPolicy=").append(getIndexDeletionPolicy().getClass().getName()).append("\n"); + IndexCommit commit = getIndexCommit(); + sb.append("commit=").append(commit == null ? "null" : commit).append("\n"); + sb.append("openMode=").append(getOpenMode()).append("\n"); + sb.append("similarity=").append(getSimilarity().getClass().getName()).append("\n"); + sb.append("mergeScheduler=").append(getMergeScheduler().getClass().getName()).append("\n"); + sb.append("default WRITE_LOCK_TIMEOUT=").append(IndexWriterConfig.WRITE_LOCK_TIMEOUT).append("\n"); + sb.append("writeLockTimeout=").append(getWriteLockTimeout()).append("\n"); + sb.append("codec=").append(getCodec()).append("\n"); + sb.append("infoStream=").append(getInfoStream().getClass().getName()).append("\n"); + sb.append("mergePolicy=").append(getMergePolicy()).append("\n"); + sb.append("indexerThreadPool=").append(getIndexerThreadPool()).append("\n"); + sb.append("readerPooling=").append(getReaderPooling()).append("\n"); + sb.append("flushPolicy=").append(getFlushPolicy()).append("\n"); + sb.append("perThreadHardLimitMB=").append(getRAMPerThreadHardLimitMB()).append("\n"); + return sb.toString(); + } + +} diff --git a/lucene/core/src/test/org/apache/lucene/index/TestFlushByRamOrCountsPolicy.java b/lucene/core/src/test/org/apache/lucene/index/TestFlushByRamOrCountsPolicy.java index bb6cdfb728b..d6a8a8d46ad 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestFlushByRamOrCountsPolicy.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestFlushByRamOrCountsPolicy.java @@ -294,7 +294,7 @@ public class TestFlushByRamOrCountsPolicy extends LuceneTestCase { public class IndexThread extends Thread { IndexWriter writer; - IndexWriterConfig iwc; + LiveIndexWriterConfig iwc; LineFileDocs docs; private AtomicInteger pendingDocs; private final boolean doRandomCommit; diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterConfig.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterConfig.java index 4ef74da04df..d912d817245 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterConfig.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterConfig.java @@ -25,8 +25,6 @@ import java.util.Set; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.codecs.Codec; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field.Store; import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.search.IndexSearcher; @@ -114,18 +112,70 @@ public class TestIndexWriterConfig extends LuceneTestCase { @Test public void testSettersChaining() throws Exception { - // Ensures that every setter returns IndexWriterConfig to enable easy - // chaining. + // Ensures that every setter returns IndexWriterConfig to allow chaining. + HashSet liveSetters = new HashSet(); + HashSet allSetters = new HashSet(); for (Method m : IndexWriterConfig.class.getDeclaredMethods()) { - if (m.getDeclaringClass() == IndexWriterConfig.class - && m.getName().startsWith("set") - && !Modifier.isStatic(m.getModifiers())) { - assertEquals("method " + m.getName() + " does not return IndexWriterConfig", - IndexWriterConfig.class, m.getReturnType()); + if (m.getName().startsWith("set") && !Modifier.isStatic(m.getModifiers())) { + allSetters.add(m.getName()); + // setters overridden from LiveIndexWriterConfig are returned twice, once with + // IndexWriterConfig return type and second with LiveIndexWriterConfig. The ones + // from LiveIndexWriterConfig are marked 'synthetic', so just collect them and + // assert in the end that we also received them from IWC. + if (m.isSynthetic()) { + liveSetters.add(m.getName()); + } else { + assertEquals("method " + m.getName() + " does not return IndexWriterConfig", + IndexWriterConfig.class, m.getReturnType()); + } } } + for (String setter : liveSetters) { + assertTrue("setter method not overridden by IndexWriterConfig: " + setter, allSetters.contains(setter)); + } } + @Test + public void testReuse() throws Exception { + Directory dir = newDirectory(); + // test that if the same IWC is reused across two IWs, it is cloned by each. + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null); + RandomIndexWriter iw = new RandomIndexWriter(random(), dir, conf); + LiveIndexWriterConfig liveConf1 = iw.w.getConfig(); + iw.close(); + + iw = new RandomIndexWriter(random(), dir, conf); + LiveIndexWriterConfig liveConf2 = iw.w.getConfig(); + iw.close(); + + // LiveIndexWriterConfig's "copy" constructor doesn't clone objects. + assertNotSame("IndexWriterConfig should have been cloned", liveConf1.getMergePolicy(), liveConf2.getMergePolicy()); + + dir.close(); + } + + @Test + public void testOverrideGetters() throws Exception { + // Test that IndexWriterConfig overrides all getters, so that javadocs + // contain all methods for the users. Also, ensures that IndexWriterConfig + // doesn't declare getters that are not declared on LiveIWC. + HashSet liveGetters = new HashSet(); + for (Method m : LiveIndexWriterConfig.class.getDeclaredMethods()) { + if (m.getName().startsWith("get") && !Modifier.isStatic(m.getModifiers())) { + liveGetters.add(m.getName()); + } + } + + for (Method m : IndexWriterConfig.class.getDeclaredMethods()) { + if (m.getName().startsWith("get") && !Modifier.isStatic(m.getModifiers())) { + assertEquals("method " + m.getName() + " not overrided by IndexWriterConfig", + IndexWriterConfig.class, m.getDeclaringClass()); + assertTrue("method " + m.getName() + " not declared on LiveIndexWriterConfig", + liveGetters.contains(m.getName())); + } + } + } + @Test public void testConstants() throws Exception { // Tests that the values of the constants does not change @@ -276,53 +326,4 @@ public class TestIndexWriterConfig extends LuceneTestCase { assertEquals(LogByteSizeMergePolicy.class, conf.getMergePolicy().getClass()); } - public void testReuse() throws Exception { - IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); - Directory dir = newDirectory(); - Document doc = new Document(); - doc.add(newTextField("foo", "bar", Store.YES)); - RandomIndexWriter riw = new RandomIndexWriter(random(), dir, iwc); - riw.addDocument(doc); - riw.close(); - - // Sharing IWC should be fine: - riw = new RandomIndexWriter(random(), dir, iwc); - riw.addDocument(doc); - riw.close(); - - dir.close(); - } - - public void testIWCClone() throws Exception { - IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); - Directory dir = newDirectory(); - RandomIndexWriter riw = new RandomIndexWriter(random(), dir, iwc); - - // Cannot clone IW's private IWC clone: - try { - riw.w.getConfig().clone(); - fail("did not hit expected exception"); - } catch (IllegalStateException ise) { - // expected - } - riw.close(); - dir.close(); - } - - public void testIWCInvalidReuse() throws Exception { - IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); - Directory dir = newDirectory(); - RandomIndexWriter riw = new RandomIndexWriter(random(), dir, iwc); - IndexWriterConfig privateIWC = riw.w.getConfig(); - riw.close(); - - // Cannot clone IW's private IWC clone: - try { - new RandomIndexWriter(random(), dir, privateIWC); - fail("did not hit expected exception"); - } catch (IllegalStateException ise) { - // expected - } - dir.close(); - } }