mirror of https://github.com/apache/lucene.git
LUCENE-3596: DirectoryTaxonomyWriter extensions can now set internal index writer config attributes such as info stream
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1206996 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4d4ec8b8c2
commit
ec660c7997
|
@ -31,6 +31,7 @@ import org.apache.lucene.index.LogByteSizeMergePolicy;
|
||||||
import org.apache.lucene.index.MultiFields;
|
import org.apache.lucene.index.MultiFields;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
|
import org.apache.lucene.index.TieredMergePolicy;
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.store.AlreadyClosedException;
|
import org.apache.lucene.store.AlreadyClosedException;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
|
@ -191,7 +192,13 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
||||||
taxoIndexCreateTime = Long.toString(System.nanoTime());
|
taxoIndexCreateTime = Long.toString(System.nanoTime());
|
||||||
}
|
}
|
||||||
|
|
||||||
indexWriter = openIndexWriter(directory, openMode);
|
IndexWriterConfig config = createIndexWriterConfig(openMode);
|
||||||
|
indexWriter = openIndexWriter(directory, config);
|
||||||
|
|
||||||
|
// verify (to some extent) that merge policy in effect would preserve category docids
|
||||||
|
assert !(indexWriter.getConfig().getMergePolicy() instanceof TieredMergePolicy) :
|
||||||
|
"for preserving category docids, merging none-adjacent segments is not allowed";
|
||||||
|
|
||||||
reader = null;
|
reader = null;
|
||||||
|
|
||||||
FieldType ft = new FieldType(TextField.TYPE_UNSTORED);
|
FieldType ft = new FieldType(TextField.TYPE_UNSTORED);
|
||||||
|
@ -225,37 +232,53 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A hook for extensions of this class to provide their own
|
* Open internal index writer, which contains the taxonomy data.
|
||||||
* {@link IndexWriter} implementation or instance. Extending classes can
|
* <p>
|
||||||
* instantiate and configure the {@link IndexWriter} as they see fit,
|
* Extensions may provide their own {@link IndexWriter} implementation or instance.
|
||||||
* including setting a {@link org.apache.lucene.index.MergeScheduler}, or
|
* <br><b>NOTE:</b> the instance this method returns will be closed upon calling
|
||||||
* {@link org.apache.lucene.index.IndexDeletionPolicy}, different RAM size
|
|
||||||
* etc.<br>
|
|
||||||
* <b>NOTE:</b> the instance this method returns will be closed upon calling
|
|
||||||
* to {@link #close()}.
|
* to {@link #close()}.
|
||||||
|
* <br><b>NOTE:</b> the merge policy in effect must not merge none adjacent segments. See
|
||||||
|
* comment in {@link #createIndexWriterConfig(IndexWriterConfig.OpenMode)} for the logic behind this.
|
||||||
|
*
|
||||||
|
* @see #createIndexWriterConfig(IndexWriterConfig.OpenMode)
|
||||||
*
|
*
|
||||||
* @param directory
|
* @param directory
|
||||||
* the {@link Directory} on top of which an {@link IndexWriter}
|
* the {@link Directory} on top of which an {@link IndexWriter}
|
||||||
* should be opened.
|
* should be opened.
|
||||||
* @param openMode
|
* @param config
|
||||||
* see {@link OpenMode}
|
* configuration for the internal index writer.
|
||||||
*/
|
*/
|
||||||
protected IndexWriter openIndexWriter(Directory directory, OpenMode openMode)
|
protected IndexWriter openIndexWriter(Directory directory, IndexWriterConfig config)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
// Make sure we use a MergePolicy which merges segments in-order and thus
|
|
||||||
// keeps the doc IDs ordered as well (this is crucial for the taxonomy
|
|
||||||
// index).
|
|
||||||
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_40,
|
|
||||||
new KeywordAnalyzer()).setOpenMode(openMode).setMergePolicy(
|
|
||||||
new LogByteSizeMergePolicy());
|
|
||||||
return new IndexWriter(directory, config);
|
return new IndexWriter(directory, config);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Currently overridden by a unit test that verifies that every index we open
|
|
||||||
// is close()ed.
|
|
||||||
/**
|
/**
|
||||||
* Open an {@link IndexReader} from the {@link #indexWriter} member, by
|
* Create the {@link IndexWriterConfig} that would be used for opening the internal index writer.
|
||||||
* calling {@link IndexWriter#getReader()}. Extending classes can override
|
* <br>Extensions can configure the {@link IndexWriter} as they see fit,
|
||||||
|
* including setting a {@link org.apache.lucene.index.MergeScheduler merge-scheduler}, or
|
||||||
|
* {@link org.apache.lucene.index.IndexDeletionPolicy deletion-policy}, different RAM size
|
||||||
|
* etc.<br>
|
||||||
|
* <br><b>NOTE:</b> internal docids of the configured index must not be altered.
|
||||||
|
* For that, categories are never deleted from the taxonomy index.
|
||||||
|
* In addition, merge policy in effect must not merge none adjacent segments.
|
||||||
|
*
|
||||||
|
* @see #openIndexWriter(Directory, IndexWriterConfig)
|
||||||
|
*
|
||||||
|
* @param openMode see {@link OpenMode}
|
||||||
|
*/
|
||||||
|
protected IndexWriterConfig createIndexWriterConfig(OpenMode openMode) {
|
||||||
|
// Make sure we use a MergePolicy which always merges adjacent segments and thus
|
||||||
|
// keeps the doc IDs ordered as well (this is crucial for the taxonomy index).
|
||||||
|
return new IndexWriterConfig(Version.LUCENE_40,
|
||||||
|
new KeywordAnalyzer()).setOpenMode(openMode).setMergePolicy(
|
||||||
|
new LogByteSizeMergePolicy());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Currently overridden by a unit test that verifies that every index we open is close()ed.
|
||||||
|
/**
|
||||||
|
* Open an {@link IndexReader} from the internal {@link IndexWriter}, by
|
||||||
|
* calling {@link IndexReader#open(IndexWriter, boolean)}. Extending classes can override
|
||||||
* this method to return their own {@link IndexReader}.
|
* this method to return their own {@link IndexReader}.
|
||||||
*/
|
*/
|
||||||
protected IndexReader openReader() throws IOException {
|
protected IndexReader openReader() throws IOException {
|
||||||
|
|
|
@ -130,10 +130,13 @@ public class TestIndexClose extends LuceneTestCase {
|
||||||
return new InstrumentedIndexReader(super.openReader());
|
return new InstrumentedIndexReader(super.openReader());
|
||||||
}
|
}
|
||||||
@Override
|
@Override
|
||||||
protected IndexWriter openIndexWriter (Directory directory, OpenMode openMode) throws IOException {
|
protected IndexWriter openIndexWriter (Directory directory, IndexWriterConfig config) throws IOException {
|
||||||
return new InstrumentedIndexWriter(directory,
|
return new InstrumentedIndexWriter(directory, config);
|
||||||
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.KEYWORD, false))
|
}
|
||||||
.setOpenMode(openMode));
|
@Override
|
||||||
|
protected IndexWriterConfig createIndexWriterConfig(OpenMode openMode) {
|
||||||
|
return newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.KEYWORD, false))
|
||||||
|
.setOpenMode(openMode).setMergePolicy(newLogMergePolicy());
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue