From f8e6c62a30ccb87b7c8467c761b0a55cef7f789f Mon Sep 17 00:00:00 2001 From: Michael Busch Date: Mon, 13 Oct 2008 18:12:11 +0000 Subject: [PATCH] LUCENE-1419: Add expert API to set custom indexing chain. git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@704193 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 7 +- .../apache/lucene/index/DocumentsWriter.java | 84 ++++++++++++------- .../org/apache/lucene/index/IndexWriter.java | 63 ++++++++++++-- 3 files changed, 115 insertions(+), 39 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index c245765feef..1bd3a4e1eb2 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,4 @@ -Lucene Change Log +Lucene Change Log $Id$ ======================= Trunk (not yet released) ======================= @@ -7,6 +7,11 @@ Changes in runtime behavior API Changes +1. LUCENE-1419: Add expert API to set custom indexing chain. This API is + package-protected for now, so we don't have to officially support it. + Yet, it will give us the possibility to try out different consumers in + in the chain. (Michael Busch) + Bug fixes 1. LUCENE-1415: MultiPhraseQuery has incorrect hashCode() and equals() diff --git a/src/java/org/apache/lucene/index/DocumentsWriter.java b/src/java/org/apache/lucene/index/DocumentsWriter.java index 83af61f4faf..6d0f3fc0481 100644 --- a/src/java/org/apache/lucene/index/DocumentsWriter.java +++ b/src/java/org/apache/lucene/index/DocumentsWriter.java @@ -184,6 +184,51 @@ final class DocumentsWriter { this.next = next; } }; + + /** + * The IndexingChain must define the {@link #getChain(DocumentsWriter)} method + * which returns the DocConsumer that the DocumentsWriter calls to process the + * documents. + */ + abstract static class IndexingChain { + abstract DocConsumer getChain(DocumentsWriter documentsWriter); + } + + static final IndexingChain DefaultIndexingChain = new IndexingChain() { + + DocConsumer getChain(DocumentsWriter documentsWriter) { + /* + This is the current indexing chain: + + DocConsumer / DocConsumerPerThread + --> code: DocFieldProcessor / DocFieldProcessorPerThread + --> DocFieldConsumer / DocFieldConsumerPerThread / DocFieldConsumerPerField + --> code: DocFieldConsumers / DocFieldConsumersPerThread / DocFieldConsumersPerField + --> code: DocInverter / DocInverterPerThread / DocInverterPerField + --> InvertedDocConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField + --> code: TermsHash / TermsHashPerThread / TermsHashPerField + --> TermsHashConsumer / TermsHashConsumerPerThread / TermsHashConsumerPerField + --> code: FreqProxTermsWriter / FreqProxTermsWriterPerThread / FreqProxTermsWriterPerField + --> code: TermVectorsTermsWriter / TermVectorsTermsWriterPerThread / TermVectorsTermsWriterPerField + --> InvertedDocEndConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField + --> code: NormsWriter / NormsWriterPerThread / NormsWriterPerField + --> code: StoredFieldsWriter / StoredFieldsWriterPerThread / StoredFieldsWriterPerField + */ + + // Build up indexing chain: + + final TermsHashConsumer termVectorsWriter = new TermVectorsTermsWriter(documentsWriter); + final TermsHashConsumer freqProxWriter = new FreqProxTermsWriter(); + + final InvertedDocConsumer termsHash = new TermsHash(documentsWriter, true, freqProxWriter, + new TermsHash(documentsWriter, false, termVectorsWriter, null)); + final NormsWriter normsWriter = new NormsWriter(); + final DocInverter docInverter = new DocInverter(termsHash, normsWriter); + final StoredFieldsWriter fieldsWriter = new StoredFieldsWriter(documentsWriter); + final DocFieldConsumers docFieldConsumers = new DocFieldConsumers(docInverter, fieldsWriter); + return new DocFieldProcessor(documentsWriter, docFieldConsumers); + } + }; final DocConsumer consumer; @@ -228,48 +273,23 @@ final class DocumentsWriter { private boolean closed; - DocumentsWriter(Directory directory, IndexWriter writer) throws IOException { + DocumentsWriter(Directory directory, IndexWriter writer, IndexingChain indexingChain) throws IOException { this.directory = directory; this.writer = writer; this.similarity = writer.getSimilarity(); flushedDocCount = writer.maxDoc(); - /* - This is the current indexing chain: - - DocConsumer / DocConsumerPerThread - --> code: DocFieldProcessor / DocFieldProcessorPerThread - --> DocFieldConsumer / DocFieldConsumerPerThread / DocFieldConsumerPerField - --> code: DocFieldConsumers / DocFieldConsumersPerThread / DocFieldConsumersPerField - --> code: DocInverter / DocInverterPerThread / DocInverterPerField - --> InvertedDocConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField - --> code: TermsHash / TermsHashPerThread / TermsHashPerField - --> TermsHashConsumer / TermsHashConsumerPerThread / TermsHashConsumerPerField - --> code: FreqProxTermsWriter / FreqProxTermsWriterPerThread / FreqProxTermsWriterPerField - --> code: TermVectorsTermsWriter / TermVectorsTermsWriterPerThread / TermVectorsTermsWriterPerField - --> InvertedDocEndConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField - --> code: NormsWriter / NormsWriterPerThread / NormsWriterPerField - --> code: StoredFieldsWriter / StoredFieldsWriterPerThread / StoredFieldsWriterPerField - */ - - // TODO FI: this should be something the user can pass in - // Build up indexing chain: - final TermsHashConsumer termVectorsWriter = new TermVectorsTermsWriter(this); - final TermsHashConsumer freqProxWriter = new FreqProxTermsWriter(); - - final InvertedDocConsumer termsHash = new TermsHash(this, true, freqProxWriter, - new TermsHash(this, false, termVectorsWriter, null)); - final NormsWriter normsWriter = new NormsWriter(); - final DocInverter docInverter = new DocInverter(termsHash, normsWriter); - final StoredFieldsWriter fieldsWriter = new StoredFieldsWriter(this); - final DocFieldConsumers docFieldConsumers = new DocFieldConsumers(docInverter, fieldsWriter); - consumer = docFieldProcessor = new DocFieldProcessor(this, docFieldConsumers); + consumer = indexingChain.getChain(this); + if (consumer instanceof DocFieldProcessor) { + docFieldProcessor = (DocFieldProcessor) consumer; + } } /** Returns true if any of the fields in the current * buffered docs have omitTf==false */ boolean hasProx() { - return docFieldProcessor.fieldInfos.hasProx(); + return (docFieldProcessor != null) ? docFieldProcessor.fieldInfos.hasProx() + : true; } /** If non-null, various details of indexing are printed diff --git a/src/java/org/apache/lucene/index/IndexWriter.java b/src/java/org/apache/lucene/index/IndexWriter.java index 93fe6f234fc..44c69bbeb4b 100644 --- a/src/java/org/apache/lucene/index/IndexWriter.java +++ b/src/java/org/apache/lucene/index/IndexWriter.java @@ -19,6 +19,7 @@ package org.apache.lucene.index; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; +import org.apache.lucene.index.DocumentsWriter.IndexingChain; import org.apache.lucene.search.Similarity; import org.apache.lucene.search.Query; import org.apache.lucene.store.Directory; @@ -994,7 +995,43 @@ public class IndexWriter { throws CorruptIndexException, LockObtainFailedException, IOException { init(d, a, create, false, deletionPolicy, false, mfl.getLimit()); } - + + /** + * Expert: constructs an IndexWriter with a custom {@link + * IndexDeletionPolicy} and {@link IndexingChain}, + * for the index in d. + * Text will be analyzed with a. If + * create is true, then a new, empty index + * will be created in d, replacing the index + * already there, if any. + * + *

NOTE: autoCommit (see above) is set to false with this + * constructor. + * + * @param d the index directory + * @param a the analyzer to use + * @param create true to create the index or overwrite + * the existing one; false to append to the existing + * index + * @param deletionPolicy see above + * @param indexingChain the {@link DocConsumer} chain to be used to + * process documents + * @param mfl whether or not to limit field lengths + * @throws CorruptIndexException if the index is corrupt + * @throws LockObtainFailedException if another writer + * has this index open (write.lock could not + * be obtained) + * @throws IOException if the directory cannot be read/written to, or + * if it does not exist and create is + * false or if there is any other low-level + * IO error + */ + IndexWriter(Directory d, Analyzer a, boolean create, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl, IndexingChain indexingChain) + throws CorruptIndexException, LockObtainFailedException, IOException { + init(d, a, create, false, deletionPolicy, false, mfl.getLimit(), indexingChain); + } + /** * Expert: constructs an IndexWriter with a custom {@link * IndexDeletionPolicy}, for the index in d. @@ -1028,16 +1065,30 @@ public class IndexWriter { init(d, a, create, false, deletionPolicy, autoCommit, DEFAULT_MAX_FIELD_LENGTH); } - private void init(Directory d, Analyzer a, boolean closeDir, IndexDeletionPolicy deletionPolicy, boolean autoCommit, int maxFieldLength) + private void init(Directory d, Analyzer a, boolean closeDir, IndexDeletionPolicy deletionPolicy, + boolean autoCommit, int maxFieldLength) + throws CorruptIndexException, LockObtainFailedException, IOException { + init(d, a, closeDir, deletionPolicy, autoCommit, maxFieldLength, DocumentsWriter.DefaultIndexingChain); + } + + private void init(Directory d, Analyzer a, boolean closeDir, IndexDeletionPolicy deletionPolicy, + boolean autoCommit, int maxFieldLength, IndexingChain indexingChain) throws CorruptIndexException, LockObtainFailedException, IOException { if (IndexReader.indexExists(d)) { - init(d, a, false, closeDir, deletionPolicy, autoCommit, maxFieldLength); + init(d, a, false, closeDir, deletionPolicy, autoCommit, maxFieldLength, indexingChain); } else { - init(d, a, true, closeDir, deletionPolicy, autoCommit, maxFieldLength); + init(d, a, true, closeDir, deletionPolicy, autoCommit, maxFieldLength, indexingChain); } } - private void init(Directory d, Analyzer a, final boolean create, boolean closeDir, IndexDeletionPolicy deletionPolicy, boolean autoCommit, int maxFieldLength) + private void init(Directory d, Analyzer a, final boolean create, boolean closeDir, + IndexDeletionPolicy deletionPolicy, boolean autoCommit, int maxFieldLength) + throws CorruptIndexException, LockObtainFailedException, IOException { + init(d, a, create, closeDir, deletionPolicy, autoCommit, maxFieldLength, DocumentsWriter.DefaultIndexingChain); + } + private void init(Directory d, Analyzer a, final boolean create, boolean closeDir, + IndexDeletionPolicy deletionPolicy, boolean autoCommit, int maxFieldLength, + IndexingChain indexingChain) throws CorruptIndexException, LockObtainFailedException, IOException { this.closeDir = closeDir; directory = d; @@ -1084,7 +1135,7 @@ public class IndexWriter { this.autoCommit = autoCommit; setRollbackSegmentInfos(segmentInfos); - docWriter = new DocumentsWriter(directory, this); + docWriter = new DocumentsWriter(directory, this, indexingChain); docWriter.setInfoStream(infoStream); docWriter.setMaxFieldLength(maxFieldLength);