LUCENE-1419: Add expert API to set custom indexing chain.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@704193 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Busch 2008-10-13 18:12:11 +00:00
parent c027596aa2
commit f8e6c62a30
3 changed files with 115 additions and 39 deletions

View File

@ -1,4 +1,4 @@
Lucene Change Log
Lucene Change Log
$Id$
======================= Trunk (not yet released) =======================
@ -7,6 +7,11 @@ Changes in runtime behavior
API Changes
1. LUCENE-1419: Add expert API to set custom indexing chain. This API is
package-protected for now, so we don't have to officially support it.
Yet, it will give us the possibility to try out different consumers in
in the chain. (Michael Busch)
Bug fixes
1. LUCENE-1415: MultiPhraseQuery has incorrect hashCode() and equals()

View File

@ -184,6 +184,51 @@ final class DocumentsWriter {
this.next = next;
}
};
/**
* The IndexingChain must define the {@link #getChain(DocumentsWriter)} method
* which returns the DocConsumer that the DocumentsWriter calls to process the
* documents.
*/
abstract static class IndexingChain {
abstract DocConsumer getChain(DocumentsWriter documentsWriter);
}
static final IndexingChain DefaultIndexingChain = new IndexingChain() {
DocConsumer getChain(DocumentsWriter documentsWriter) {
/*
This is the current indexing chain:
DocConsumer / DocConsumerPerThread
--> code: DocFieldProcessor / DocFieldProcessorPerThread
--> DocFieldConsumer / DocFieldConsumerPerThread / DocFieldConsumerPerField
--> code: DocFieldConsumers / DocFieldConsumersPerThread / DocFieldConsumersPerField
--> code: DocInverter / DocInverterPerThread / DocInverterPerField
--> InvertedDocConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField
--> code: TermsHash / TermsHashPerThread / TermsHashPerField
--> TermsHashConsumer / TermsHashConsumerPerThread / TermsHashConsumerPerField
--> code: FreqProxTermsWriter / FreqProxTermsWriterPerThread / FreqProxTermsWriterPerField
--> code: TermVectorsTermsWriter / TermVectorsTermsWriterPerThread / TermVectorsTermsWriterPerField
--> InvertedDocEndConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField
--> code: NormsWriter / NormsWriterPerThread / NormsWriterPerField
--> code: StoredFieldsWriter / StoredFieldsWriterPerThread / StoredFieldsWriterPerField
*/
// Build up indexing chain:
final TermsHashConsumer termVectorsWriter = new TermVectorsTermsWriter(documentsWriter);
final TermsHashConsumer freqProxWriter = new FreqProxTermsWriter();
final InvertedDocConsumer termsHash = new TermsHash(documentsWriter, true, freqProxWriter,
new TermsHash(documentsWriter, false, termVectorsWriter, null));
final NormsWriter normsWriter = new NormsWriter();
final DocInverter docInverter = new DocInverter(termsHash, normsWriter);
final StoredFieldsWriter fieldsWriter = new StoredFieldsWriter(documentsWriter);
final DocFieldConsumers docFieldConsumers = new DocFieldConsumers(docInverter, fieldsWriter);
return new DocFieldProcessor(documentsWriter, docFieldConsumers);
}
};
final DocConsumer consumer;
@ -228,48 +273,23 @@ final class DocumentsWriter {
private boolean closed;
DocumentsWriter(Directory directory, IndexWriter writer) throws IOException {
DocumentsWriter(Directory directory, IndexWriter writer, IndexingChain indexingChain) throws IOException {
this.directory = directory;
this.writer = writer;
this.similarity = writer.getSimilarity();
flushedDocCount = writer.maxDoc();
/*
This is the current indexing chain:
DocConsumer / DocConsumerPerThread
--> code: DocFieldProcessor / DocFieldProcessorPerThread
--> DocFieldConsumer / DocFieldConsumerPerThread / DocFieldConsumerPerField
--> code: DocFieldConsumers / DocFieldConsumersPerThread / DocFieldConsumersPerField
--> code: DocInverter / DocInverterPerThread / DocInverterPerField
--> InvertedDocConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField
--> code: TermsHash / TermsHashPerThread / TermsHashPerField
--> TermsHashConsumer / TermsHashConsumerPerThread / TermsHashConsumerPerField
--> code: FreqProxTermsWriter / FreqProxTermsWriterPerThread / FreqProxTermsWriterPerField
--> code: TermVectorsTermsWriter / TermVectorsTermsWriterPerThread / TermVectorsTermsWriterPerField
--> InvertedDocEndConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField
--> code: NormsWriter / NormsWriterPerThread / NormsWriterPerField
--> code: StoredFieldsWriter / StoredFieldsWriterPerThread / StoredFieldsWriterPerField
*/
// TODO FI: this should be something the user can pass in
// Build up indexing chain:
final TermsHashConsumer termVectorsWriter = new TermVectorsTermsWriter(this);
final TermsHashConsumer freqProxWriter = new FreqProxTermsWriter();
final InvertedDocConsumer termsHash = new TermsHash(this, true, freqProxWriter,
new TermsHash(this, false, termVectorsWriter, null));
final NormsWriter normsWriter = new NormsWriter();
final DocInverter docInverter = new DocInverter(termsHash, normsWriter);
final StoredFieldsWriter fieldsWriter = new StoredFieldsWriter(this);
final DocFieldConsumers docFieldConsumers = new DocFieldConsumers(docInverter, fieldsWriter);
consumer = docFieldProcessor = new DocFieldProcessor(this, docFieldConsumers);
consumer = indexingChain.getChain(this);
if (consumer instanceof DocFieldProcessor) {
docFieldProcessor = (DocFieldProcessor) consumer;
}
}
/** Returns true if any of the fields in the current
* buffered docs have omitTf==false */
boolean hasProx() {
return docFieldProcessor.fieldInfos.hasProx();
return (docFieldProcessor != null) ? docFieldProcessor.fieldInfos.hasProx()
: true;
}
/** If non-null, various details of indexing are printed

View File

@ -19,6 +19,7 @@ package org.apache.lucene.index;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DocumentsWriter.IndexingChain;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
@ -994,7 +995,43 @@ public class IndexWriter {
throws CorruptIndexException, LockObtainFailedException, IOException {
init(d, a, create, false, deletionPolicy, false, mfl.getLimit());
}
/**
* Expert: constructs an IndexWriter with a custom {@link
* IndexDeletionPolicy} and {@link IndexingChain},
* for the index in <code>d</code>.
* Text will be analyzed with <code>a</code>. If
* <code>create</code> is true, then a new, empty index
* will be created in <code>d</code>, replacing the index
* already there, if any.
*
* <p><b>NOTE</b>: autoCommit (see <a
* href="#autoCommit">above</a>) is set to false with this
* constructor.
*
* @param d the index directory
* @param a the analyzer to use
* @param create <code>true</code> to create the index or overwrite
* the existing one; <code>false</code> to append to the existing
* index
* @param deletionPolicy see <a href="#deletionPolicy">above</a>
* @param indexingChain the {@link DocConsumer} chain to be used to
* process documents
* @param mfl whether or not to limit field lengths
* @throws CorruptIndexException if the index is corrupt
* @throws LockObtainFailedException if another writer
* has this index open (<code>write.lock</code> could not
* be obtained)
* @throws IOException if the directory cannot be read/written to, or
* if it does not exist and <code>create</code> is
* <code>false</code> or if there is any other low-level
* IO error
*/
IndexWriter(Directory d, Analyzer a, boolean create, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl, IndexingChain indexingChain)
throws CorruptIndexException, LockObtainFailedException, IOException {
init(d, a, create, false, deletionPolicy, false, mfl.getLimit(), indexingChain);
}
/**
* Expert: constructs an IndexWriter with a custom {@link
* IndexDeletionPolicy}, for the index in <code>d</code>.
@ -1028,16 +1065,30 @@ public class IndexWriter {
init(d, a, create, false, deletionPolicy, autoCommit, DEFAULT_MAX_FIELD_LENGTH);
}
private void init(Directory d, Analyzer a, boolean closeDir, IndexDeletionPolicy deletionPolicy, boolean autoCommit, int maxFieldLength)
private void init(Directory d, Analyzer a, boolean closeDir, IndexDeletionPolicy deletionPolicy,
boolean autoCommit, int maxFieldLength)
throws CorruptIndexException, LockObtainFailedException, IOException {
init(d, a, closeDir, deletionPolicy, autoCommit, maxFieldLength, DocumentsWriter.DefaultIndexingChain);
}
private void init(Directory d, Analyzer a, boolean closeDir, IndexDeletionPolicy deletionPolicy,
boolean autoCommit, int maxFieldLength, IndexingChain indexingChain)
throws CorruptIndexException, LockObtainFailedException, IOException {
if (IndexReader.indexExists(d)) {
init(d, a, false, closeDir, deletionPolicy, autoCommit, maxFieldLength);
init(d, a, false, closeDir, deletionPolicy, autoCommit, maxFieldLength, indexingChain);
} else {
init(d, a, true, closeDir, deletionPolicy, autoCommit, maxFieldLength);
init(d, a, true, closeDir, deletionPolicy, autoCommit, maxFieldLength, indexingChain);
}
}
private void init(Directory d, Analyzer a, final boolean create, boolean closeDir, IndexDeletionPolicy deletionPolicy, boolean autoCommit, int maxFieldLength)
private void init(Directory d, Analyzer a, final boolean create, boolean closeDir,
IndexDeletionPolicy deletionPolicy, boolean autoCommit, int maxFieldLength)
throws CorruptIndexException, LockObtainFailedException, IOException {
init(d, a, create, closeDir, deletionPolicy, autoCommit, maxFieldLength, DocumentsWriter.DefaultIndexingChain);
}
private void init(Directory d, Analyzer a, final boolean create, boolean closeDir,
IndexDeletionPolicy deletionPolicy, boolean autoCommit, int maxFieldLength,
IndexingChain indexingChain)
throws CorruptIndexException, LockObtainFailedException, IOException {
this.closeDir = closeDir;
directory = d;
@ -1084,7 +1135,7 @@ public class IndexWriter {
this.autoCommit = autoCommit;
setRollbackSegmentInfos(segmentInfos);
docWriter = new DocumentsWriter(directory, this);
docWriter = new DocumentsWriter(directory, this, indexingChain);
docWriter.setInfoStream(infoStream);
docWriter.setMaxFieldLength(maxFieldLength);