mirror of https://github.com/apache/lucene.git
LUCENE-1419: Add expert API to set custom indexing chain.
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@704193 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c027596aa2
commit
f8e6c62a30
|
@ -1,4 +1,4 @@
|
|||
Lucene Change Log
|
||||
Lucene Change Log
|
||||
$Id$
|
||||
|
||||
======================= Trunk (not yet released) =======================
|
||||
|
@ -7,6 +7,11 @@ Changes in runtime behavior
|
|||
|
||||
API Changes
|
||||
|
||||
1. LUCENE-1419: Add expert API to set custom indexing chain. This API is
|
||||
package-protected for now, so we don't have to officially support it.
|
||||
Yet, it will give us the possibility to try out different consumers in
|
||||
in the chain. (Michael Busch)
|
||||
|
||||
Bug fixes
|
||||
|
||||
1. LUCENE-1415: MultiPhraseQuery has incorrect hashCode() and equals()
|
||||
|
|
|
@ -184,6 +184,51 @@ final class DocumentsWriter {
|
|||
this.next = next;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* The IndexingChain must define the {@link #getChain(DocumentsWriter)} method
|
||||
* which returns the DocConsumer that the DocumentsWriter calls to process the
|
||||
* documents.
|
||||
*/
|
||||
abstract static class IndexingChain {
|
||||
abstract DocConsumer getChain(DocumentsWriter documentsWriter);
|
||||
}
|
||||
|
||||
static final IndexingChain DefaultIndexingChain = new IndexingChain() {
|
||||
|
||||
DocConsumer getChain(DocumentsWriter documentsWriter) {
|
||||
/*
|
||||
This is the current indexing chain:
|
||||
|
||||
DocConsumer / DocConsumerPerThread
|
||||
--> code: DocFieldProcessor / DocFieldProcessorPerThread
|
||||
--> DocFieldConsumer / DocFieldConsumerPerThread / DocFieldConsumerPerField
|
||||
--> code: DocFieldConsumers / DocFieldConsumersPerThread / DocFieldConsumersPerField
|
||||
--> code: DocInverter / DocInverterPerThread / DocInverterPerField
|
||||
--> InvertedDocConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField
|
||||
--> code: TermsHash / TermsHashPerThread / TermsHashPerField
|
||||
--> TermsHashConsumer / TermsHashConsumerPerThread / TermsHashConsumerPerField
|
||||
--> code: FreqProxTermsWriter / FreqProxTermsWriterPerThread / FreqProxTermsWriterPerField
|
||||
--> code: TermVectorsTermsWriter / TermVectorsTermsWriterPerThread / TermVectorsTermsWriterPerField
|
||||
--> InvertedDocEndConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField
|
||||
--> code: NormsWriter / NormsWriterPerThread / NormsWriterPerField
|
||||
--> code: StoredFieldsWriter / StoredFieldsWriterPerThread / StoredFieldsWriterPerField
|
||||
*/
|
||||
|
||||
// Build up indexing chain:
|
||||
|
||||
final TermsHashConsumer termVectorsWriter = new TermVectorsTermsWriter(documentsWriter);
|
||||
final TermsHashConsumer freqProxWriter = new FreqProxTermsWriter();
|
||||
|
||||
final InvertedDocConsumer termsHash = new TermsHash(documentsWriter, true, freqProxWriter,
|
||||
new TermsHash(documentsWriter, false, termVectorsWriter, null));
|
||||
final NormsWriter normsWriter = new NormsWriter();
|
||||
final DocInverter docInverter = new DocInverter(termsHash, normsWriter);
|
||||
final StoredFieldsWriter fieldsWriter = new StoredFieldsWriter(documentsWriter);
|
||||
final DocFieldConsumers docFieldConsumers = new DocFieldConsumers(docInverter, fieldsWriter);
|
||||
return new DocFieldProcessor(documentsWriter, docFieldConsumers);
|
||||
}
|
||||
};
|
||||
|
||||
final DocConsumer consumer;
|
||||
|
||||
|
@ -228,48 +273,23 @@ final class DocumentsWriter {
|
|||
|
||||
private boolean closed;
|
||||
|
||||
DocumentsWriter(Directory directory, IndexWriter writer) throws IOException {
|
||||
DocumentsWriter(Directory directory, IndexWriter writer, IndexingChain indexingChain) throws IOException {
|
||||
this.directory = directory;
|
||||
this.writer = writer;
|
||||
this.similarity = writer.getSimilarity();
|
||||
flushedDocCount = writer.maxDoc();
|
||||
|
||||
/*
|
||||
This is the current indexing chain:
|
||||
|
||||
DocConsumer / DocConsumerPerThread
|
||||
--> code: DocFieldProcessor / DocFieldProcessorPerThread
|
||||
--> DocFieldConsumer / DocFieldConsumerPerThread / DocFieldConsumerPerField
|
||||
--> code: DocFieldConsumers / DocFieldConsumersPerThread / DocFieldConsumersPerField
|
||||
--> code: DocInverter / DocInverterPerThread / DocInverterPerField
|
||||
--> InvertedDocConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField
|
||||
--> code: TermsHash / TermsHashPerThread / TermsHashPerField
|
||||
--> TermsHashConsumer / TermsHashConsumerPerThread / TermsHashConsumerPerField
|
||||
--> code: FreqProxTermsWriter / FreqProxTermsWriterPerThread / FreqProxTermsWriterPerField
|
||||
--> code: TermVectorsTermsWriter / TermVectorsTermsWriterPerThread / TermVectorsTermsWriterPerField
|
||||
--> InvertedDocEndConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField
|
||||
--> code: NormsWriter / NormsWriterPerThread / NormsWriterPerField
|
||||
--> code: StoredFieldsWriter / StoredFieldsWriterPerThread / StoredFieldsWriterPerField
|
||||
*/
|
||||
|
||||
// TODO FI: this should be something the user can pass in
|
||||
// Build up indexing chain:
|
||||
final TermsHashConsumer termVectorsWriter = new TermVectorsTermsWriter(this);
|
||||
final TermsHashConsumer freqProxWriter = new FreqProxTermsWriter();
|
||||
|
||||
final InvertedDocConsumer termsHash = new TermsHash(this, true, freqProxWriter,
|
||||
new TermsHash(this, false, termVectorsWriter, null));
|
||||
final NormsWriter normsWriter = new NormsWriter();
|
||||
final DocInverter docInverter = new DocInverter(termsHash, normsWriter);
|
||||
final StoredFieldsWriter fieldsWriter = new StoredFieldsWriter(this);
|
||||
final DocFieldConsumers docFieldConsumers = new DocFieldConsumers(docInverter, fieldsWriter);
|
||||
consumer = docFieldProcessor = new DocFieldProcessor(this, docFieldConsumers);
|
||||
consumer = indexingChain.getChain(this);
|
||||
if (consumer instanceof DocFieldProcessor) {
|
||||
docFieldProcessor = (DocFieldProcessor) consumer;
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns true if any of the fields in the current
|
||||
* buffered docs have omitTf==false */
|
||||
boolean hasProx() {
|
||||
return docFieldProcessor.fieldInfos.hasProx();
|
||||
return (docFieldProcessor != null) ? docFieldProcessor.fieldInfos.hasProx()
|
||||
: true;
|
||||
}
|
||||
|
||||
/** If non-null, various details of indexing are printed
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.index;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.DocumentsWriter.IndexingChain;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
@ -994,7 +995,43 @@ public class IndexWriter {
|
|||
throws CorruptIndexException, LockObtainFailedException, IOException {
|
||||
init(d, a, create, false, deletionPolicy, false, mfl.getLimit());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Expert: constructs an IndexWriter with a custom {@link
|
||||
* IndexDeletionPolicy} and {@link IndexingChain},
|
||||
* for the index in <code>d</code>.
|
||||
* Text will be analyzed with <code>a</code>. If
|
||||
* <code>create</code> is true, then a new, empty index
|
||||
* will be created in <code>d</code>, replacing the index
|
||||
* already there, if any.
|
||||
*
|
||||
* <p><b>NOTE</b>: autoCommit (see <a
|
||||
* href="#autoCommit">above</a>) is set to false with this
|
||||
* constructor.
|
||||
*
|
||||
* @param d the index directory
|
||||
* @param a the analyzer to use
|
||||
* @param create <code>true</code> to create the index or overwrite
|
||||
* the existing one; <code>false</code> to append to the existing
|
||||
* index
|
||||
* @param deletionPolicy see <a href="#deletionPolicy">above</a>
|
||||
* @param indexingChain the {@link DocConsumer} chain to be used to
|
||||
* process documents
|
||||
* @param mfl whether or not to limit field lengths
|
||||
* @throws CorruptIndexException if the index is corrupt
|
||||
* @throws LockObtainFailedException if another writer
|
||||
* has this index open (<code>write.lock</code> could not
|
||||
* be obtained)
|
||||
* @throws IOException if the directory cannot be read/written to, or
|
||||
* if it does not exist and <code>create</code> is
|
||||
* <code>false</code> or if there is any other low-level
|
||||
* IO error
|
||||
*/
|
||||
IndexWriter(Directory d, Analyzer a, boolean create, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl, IndexingChain indexingChain)
|
||||
throws CorruptIndexException, LockObtainFailedException, IOException {
|
||||
init(d, a, create, false, deletionPolicy, false, mfl.getLimit(), indexingChain);
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: constructs an IndexWriter with a custom {@link
|
||||
* IndexDeletionPolicy}, for the index in <code>d</code>.
|
||||
|
@ -1028,16 +1065,30 @@ public class IndexWriter {
|
|||
init(d, a, create, false, deletionPolicy, autoCommit, DEFAULT_MAX_FIELD_LENGTH);
|
||||
}
|
||||
|
||||
private void init(Directory d, Analyzer a, boolean closeDir, IndexDeletionPolicy deletionPolicy, boolean autoCommit, int maxFieldLength)
|
||||
private void init(Directory d, Analyzer a, boolean closeDir, IndexDeletionPolicy deletionPolicy,
|
||||
boolean autoCommit, int maxFieldLength)
|
||||
throws CorruptIndexException, LockObtainFailedException, IOException {
|
||||
init(d, a, closeDir, deletionPolicy, autoCommit, maxFieldLength, DocumentsWriter.DefaultIndexingChain);
|
||||
}
|
||||
|
||||
private void init(Directory d, Analyzer a, boolean closeDir, IndexDeletionPolicy deletionPolicy,
|
||||
boolean autoCommit, int maxFieldLength, IndexingChain indexingChain)
|
||||
throws CorruptIndexException, LockObtainFailedException, IOException {
|
||||
if (IndexReader.indexExists(d)) {
|
||||
init(d, a, false, closeDir, deletionPolicy, autoCommit, maxFieldLength);
|
||||
init(d, a, false, closeDir, deletionPolicy, autoCommit, maxFieldLength, indexingChain);
|
||||
} else {
|
||||
init(d, a, true, closeDir, deletionPolicy, autoCommit, maxFieldLength);
|
||||
init(d, a, true, closeDir, deletionPolicy, autoCommit, maxFieldLength, indexingChain);
|
||||
}
|
||||
}
|
||||
|
||||
private void init(Directory d, Analyzer a, final boolean create, boolean closeDir, IndexDeletionPolicy deletionPolicy, boolean autoCommit, int maxFieldLength)
|
||||
private void init(Directory d, Analyzer a, final boolean create, boolean closeDir,
|
||||
IndexDeletionPolicy deletionPolicy, boolean autoCommit, int maxFieldLength)
|
||||
throws CorruptIndexException, LockObtainFailedException, IOException {
|
||||
init(d, a, create, closeDir, deletionPolicy, autoCommit, maxFieldLength, DocumentsWriter.DefaultIndexingChain);
|
||||
}
|
||||
private void init(Directory d, Analyzer a, final boolean create, boolean closeDir,
|
||||
IndexDeletionPolicy deletionPolicy, boolean autoCommit, int maxFieldLength,
|
||||
IndexingChain indexingChain)
|
||||
throws CorruptIndexException, LockObtainFailedException, IOException {
|
||||
this.closeDir = closeDir;
|
||||
directory = d;
|
||||
|
@ -1084,7 +1135,7 @@ public class IndexWriter {
|
|||
this.autoCommit = autoCommit;
|
||||
setRollbackSegmentInfos(segmentInfos);
|
||||
|
||||
docWriter = new DocumentsWriter(directory, this);
|
||||
docWriter = new DocumentsWriter(directory, this, indexingChain);
|
||||
docWriter.setInfoStream(infoStream);
|
||||
docWriter.setMaxFieldLength(maxFieldLength);
|
||||
|
||||
|
|
Loading…
Reference in New Issue