LUCENE-3596: DirectoryTaxonomyWriter extensions can now set internal index writer config attributes such as info stream

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1206996 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Doron Cohen 2011-11-28 07:05:44 +00:00
parent 4d4ec8b8c2
commit ec660c7997
2 changed files with 51 additions and 25 deletions
modules/facet/src
java/org/apache/lucene/facet/taxonomy/directory
test/org/apache/lucene/facet/taxonomy/directory

View File

@ -31,6 +31,7 @@ import org.apache.lucene.index.LogByteSizeMergePolicy;
import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Terms; import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.TieredMergePolicy;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
@ -191,7 +192,13 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
taxoIndexCreateTime = Long.toString(System.nanoTime()); taxoIndexCreateTime = Long.toString(System.nanoTime());
} }
indexWriter = openIndexWriter(directory, openMode); IndexWriterConfig config = createIndexWriterConfig(openMode);
indexWriter = openIndexWriter(directory, config);
// verify (to some extent) that merge policy in effect would preserve category docids
assert !(indexWriter.getConfig().getMergePolicy() instanceof TieredMergePolicy) :
"for preserving category docids, merging none-adjacent segments is not allowed";
reader = null; reader = null;
FieldType ft = new FieldType(TextField.TYPE_UNSTORED); FieldType ft = new FieldType(TextField.TYPE_UNSTORED);
@ -225,37 +232,53 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
} }
/** /**
* A hook for extensions of this class to provide their own * Open internal index writer, which contains the taxonomy data.
* {@link IndexWriter} implementation or instance. Extending classes can * <p>
* instantiate and configure the {@link IndexWriter} as they see fit, * Extensions may provide their own {@link IndexWriter} implementation or instance.
* including setting a {@link org.apache.lucene.index.MergeScheduler}, or * <br><b>NOTE:</b> the instance this method returns will be closed upon calling
* {@link org.apache.lucene.index.IndexDeletionPolicy}, different RAM size
* etc.<br>
* <b>NOTE:</b> the instance this method returns will be closed upon calling
* to {@link #close()}. * to {@link #close()}.
* <br><b>NOTE:</b> the merge policy in effect must not merge none adjacent segments. See
* comment in {@link #createIndexWriterConfig(IndexWriterConfig.OpenMode)} for the logic behind this.
*
* @see #createIndexWriterConfig(IndexWriterConfig.OpenMode)
* *
* @param directory * @param directory
* the {@link Directory} on top of which an {@link IndexWriter} * the {@link Directory} on top of which an {@link IndexWriter}
* should be opened. * should be opened.
* @param openMode * @param config
* see {@link OpenMode} * configuration for the internal index writer.
*/ */
protected IndexWriter openIndexWriter(Directory directory, OpenMode openMode) protected IndexWriter openIndexWriter(Directory directory, IndexWriterConfig config)
throws IOException { throws IOException {
// Make sure we use a MergePolicy which merges segments in-order and thus
// keeps the doc IDs ordered as well (this is crucial for the taxonomy
// index).
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_40,
new KeywordAnalyzer()).setOpenMode(openMode).setMergePolicy(
new LogByteSizeMergePolicy());
return new IndexWriter(directory, config); return new IndexWriter(directory, config);
} }
// Currently overridden by a unit test that verifies that every index we open
// is close()ed.
/** /**
* Open an {@link IndexReader} from the {@link #indexWriter} member, by * Create the {@link IndexWriterConfig} that would be used for opening the internal index writer.
* calling {@link IndexWriter#getReader()}. Extending classes can override * <br>Extensions can configure the {@link IndexWriter} as they see fit,
* including setting a {@link org.apache.lucene.index.MergeScheduler merge-scheduler}, or
* {@link org.apache.lucene.index.IndexDeletionPolicy deletion-policy}, different RAM size
* etc.<br>
* <br><b>NOTE:</b> internal docids of the configured index must not be altered.
* For that, categories are never deleted from the taxonomy index.
* In addition, merge policy in effect must not merge none adjacent segments.
*
* @see #openIndexWriter(Directory, IndexWriterConfig)
*
* @param openMode see {@link OpenMode}
*/
protected IndexWriterConfig createIndexWriterConfig(OpenMode openMode) {
// Make sure we use a MergePolicy which always merges adjacent segments and thus
// keeps the doc IDs ordered as well (this is crucial for the taxonomy index).
return new IndexWriterConfig(Version.LUCENE_40,
new KeywordAnalyzer()).setOpenMode(openMode).setMergePolicy(
new LogByteSizeMergePolicy());
}
// Currently overridden by a unit test that verifies that every index we open is close()ed.
/**
* Open an {@link IndexReader} from the internal {@link IndexWriter}, by
* calling {@link IndexReader#open(IndexWriter, boolean)}. Extending classes can override
* this method to return their own {@link IndexReader}. * this method to return their own {@link IndexReader}.
*/ */
protected IndexReader openReader() throws IOException { protected IndexReader openReader() throws IOException {

View File

@ -130,10 +130,13 @@ public class TestIndexClose extends LuceneTestCase {
return new InstrumentedIndexReader(super.openReader()); return new InstrumentedIndexReader(super.openReader());
} }
@Override @Override
protected IndexWriter openIndexWriter (Directory directory, OpenMode openMode) throws IOException { protected IndexWriter openIndexWriter (Directory directory, IndexWriterConfig config) throws IOException {
return new InstrumentedIndexWriter(directory, return new InstrumentedIndexWriter(directory, config);
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.KEYWORD, false)) }
.setOpenMode(openMode)); @Override
protected IndexWriterConfig createIndexWriterConfig(OpenMode openMode) {
return newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.KEYWORD, false))
.setOpenMode(openMode).setMergePolicy(newLogMergePolicy());
} }
} }