LUCENE-1419: Add expert API to set custom indexing chain.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@704193 13f79535-47bb-0310-9956-ffa450edef68
2008-10-13 18:12:11 +00:00 · 2008-10-13 18:12:11 +00:00 · f8e6c62a30
parent c027596aa2
commit f8e6c62a30
3 changed files with 115 additions and 39 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -1,4 +1,4 @@
-Lucene Change Log
+Lucene Change Log
 $Id$

 ======================= Trunk (not yet released) =======================
@ -7,6 +7,11 @@ Changes in runtime behavior

 API Changes

+1. LUCENE-1419: Add expert API to set custom indexing chain. This API is 
+   package-protected for now, so we don't have to officially support it.
+   Yet, it will give us the possibility to try out different consumers in
+   in the chain. (Michael Busch)
+
 Bug fixes

 1. LUCENE-1415: MultiPhraseQuery has incorrect hashCode() and equals()
--- a/src/java/org/apache/lucene/index/DocumentsWriter.java
+++ b/src/java/org/apache/lucene/index/DocumentsWriter.java
@ -184,6 +184,51 @@ final class DocumentsWriter {
      this.next = next;
    }
  };
+  
+  /**
+   * The IndexingChain must define the {@link #getChain(DocumentsWriter)} method
+   * which returns the DocConsumer that the DocumentsWriter calls to process the
+   * documents. 
+   */
+  abstract static class IndexingChain {
+    abstract DocConsumer getChain(DocumentsWriter documentsWriter);
+  }
+  
+  static final IndexingChain DefaultIndexingChain = new IndexingChain() {
+
+    DocConsumer getChain(DocumentsWriter documentsWriter) {
+      /*
+      This is the current indexing chain:
+
+      DocConsumer / DocConsumerPerThread
+        --> code: DocFieldProcessor / DocFieldProcessorPerThread
+          --> DocFieldConsumer / DocFieldConsumerPerThread / DocFieldConsumerPerField
+            --> code: DocFieldConsumers / DocFieldConsumersPerThread / DocFieldConsumersPerField
+              --> code: DocInverter / DocInverterPerThread / DocInverterPerField
+                --> InvertedDocConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField
+                  --> code: TermsHash / TermsHashPerThread / TermsHashPerField
+                    --> TermsHashConsumer / TermsHashConsumerPerThread / TermsHashConsumerPerField
+                      --> code: FreqProxTermsWriter / FreqProxTermsWriterPerThread / FreqProxTermsWriterPerField
+                      --> code: TermVectorsTermsWriter / TermVectorsTermsWriterPerThread / TermVectorsTermsWriterPerField
+                --> InvertedDocEndConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField
+                  --> code: NormsWriter / NormsWriterPerThread / NormsWriterPerField
+              --> code: StoredFieldsWriter / StoredFieldsWriterPerThread / StoredFieldsWriterPerField
+    */
+
+    // Build up indexing chain:
+
+      final TermsHashConsumer termVectorsWriter = new TermVectorsTermsWriter(documentsWriter);
+      final TermsHashConsumer freqProxWriter = new FreqProxTermsWriter();
+
+      final InvertedDocConsumer  termsHash = new TermsHash(documentsWriter, true, freqProxWriter,
+                                                           new TermsHash(documentsWriter, false, termVectorsWriter, null));
+      final NormsWriter normsWriter = new NormsWriter();
+      final DocInverter docInverter = new DocInverter(termsHash, normsWriter);
+      final StoredFieldsWriter fieldsWriter = new StoredFieldsWriter(documentsWriter);
+      final DocFieldConsumers docFieldConsumers = new DocFieldConsumers(docInverter, fieldsWriter);
+      return new DocFieldProcessor(documentsWriter, docFieldConsumers);
+    }
+  };

  final DocConsumer consumer;

@ -228,48 +273,23 @@ final class DocumentsWriter {

  private boolean closed;

-  DocumentsWriter(Directory directory, IndexWriter writer) throws IOException {
+  DocumentsWriter(Directory directory, IndexWriter writer, IndexingChain indexingChain) throws IOException {
    this.directory = directory;
    this.writer = writer;
    this.similarity = writer.getSimilarity();
    flushedDocCount = writer.maxDoc();

-    /*
-      This is the current indexing chain:
-
-      DocConsumer / DocConsumerPerThread
-        --> code: DocFieldProcessor / DocFieldProcessorPerThread
-          --> DocFieldConsumer / DocFieldConsumerPerThread / DocFieldConsumerPerField
-            --> code: DocFieldConsumers / DocFieldConsumersPerThread / DocFieldConsumersPerField
-              --> code: DocInverter / DocInverterPerThread / DocInverterPerField
-                --> InvertedDocConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField
-                  --> code: TermsHash / TermsHashPerThread / TermsHashPerField
-                    --> TermsHashConsumer / TermsHashConsumerPerThread / TermsHashConsumerPerField
-                      --> code: FreqProxTermsWriter / FreqProxTermsWriterPerThread / FreqProxTermsWriterPerField
-                      --> code: TermVectorsTermsWriter / TermVectorsTermsWriterPerThread / TermVectorsTermsWriterPerField
-                --> InvertedDocEndConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField
-                  --> code: NormsWriter / NormsWriterPerThread / NormsWriterPerField
-              --> code: StoredFieldsWriter / StoredFieldsWriterPerThread / StoredFieldsWriterPerField
-    */
-
-    // TODO FI: this should be something the user can pass in
-    // Build up indexing chain:
-    final TermsHashConsumer termVectorsWriter = new TermVectorsTermsWriter(this);
-    final TermsHashConsumer freqProxWriter = new FreqProxTermsWriter();
-
-    final InvertedDocConsumer  termsHash = new TermsHash(this, true, freqProxWriter,
-                                                         new TermsHash(this, false, termVectorsWriter, null));
-    final NormsWriter normsWriter = new NormsWriter();
-    final DocInverter docInverter = new DocInverter(termsHash, normsWriter);
-    final StoredFieldsWriter fieldsWriter = new StoredFieldsWriter(this);
-    final DocFieldConsumers docFieldConsumers = new DocFieldConsumers(docInverter, fieldsWriter);
-    consumer = docFieldProcessor = new DocFieldProcessor(this, docFieldConsumers);
+    consumer = indexingChain.getChain(this);
+    if (consumer instanceof DocFieldProcessor) {
+      docFieldProcessor = (DocFieldProcessor) consumer;
+    }
  }

  /** Returns true if any of the fields in the current
   *  buffered docs have omitTf==false */
  boolean hasProx() {
-    return docFieldProcessor.fieldInfos.hasProx();
+    return (docFieldProcessor != null) ? docFieldProcessor.fieldInfos.hasProx()
+                                       : true;
  }

  /** If non-null, various details of indexing are printed
--- a/src/java/org/apache/lucene/index/IndexWriter.java
+++ b/src/java/org/apache/lucene/index/IndexWriter.java
@ -19,6 +19,7 @@ package org.apache.lucene.index;

 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.document.Document;
+import org.apache.lucene.index.DocumentsWriter.IndexingChain;
 import org.apache.lucene.search.Similarity;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.store.Directory;
@ -994,7 +995,43 @@ public class IndexWriter {
       throws CorruptIndexException, LockObtainFailedException, IOException {
    init(d, a, create, false, deletionPolicy, false, mfl.getLimit());
  }
-
+  
+  /**
+   * Expert: constructs an IndexWriter with a custom {@link
+   * IndexDeletionPolicy} and {@link IndexingChain}, 
+   * for the index in <code>d</code>.
+   * Text will be analyzed with <code>a</code>.  If
+   * <code>create</code> is true, then a new, empty index
+   * will be created in <code>d</code>, replacing the index
+   * already there, if any.
+   *
+   * <p><b>NOTE</b>: autoCommit (see <a
+   * href="#autoCommit">above</a>) is set to false with this
+   * constructor.
+   *
+   * @param d the index directory
+   * @param a the analyzer to use
+   * @param create <code>true</code> to create the index or overwrite
+   *  the existing one; <code>false</code> to append to the existing
+   *  index
+   * @param deletionPolicy see <a href="#deletionPolicy">above</a>
+   * @param indexingChain the {@link DocConsumer} chain to be used to 
+   *  process documents
+   * @param mfl whether or not to limit field lengths
+   * @throws CorruptIndexException if the index is corrupt
+   * @throws LockObtainFailedException if another writer
+   *  has this index open (<code>write.lock</code> could not
+   *  be obtained)
+   * @throws IOException if the directory cannot be read/written to, or
+   *  if it does not exist and <code>create</code> is
+   *  <code>false</code> or if there is any other low-level
+   *  IO error
+   */
+  IndexWriter(Directory d, Analyzer a, boolean create, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl, IndexingChain indexingChain)
+       throws CorruptIndexException, LockObtainFailedException, IOException {
+    init(d, a, create, false, deletionPolicy, false, mfl.getLimit(), indexingChain);
+  }
+  
  /**
   * Expert: constructs an IndexWriter with a custom {@link
   * IndexDeletionPolicy}, for the index in <code>d</code>.
@ -1028,16 +1065,30 @@ public class IndexWriter {
    init(d, a, create, false, deletionPolicy, autoCommit, DEFAULT_MAX_FIELD_LENGTH);
  }

-  private void init(Directory d, Analyzer a, boolean closeDir, IndexDeletionPolicy deletionPolicy, boolean autoCommit, int maxFieldLength)
+  private void init(Directory d, Analyzer a, boolean closeDir, IndexDeletionPolicy deletionPolicy, 
+      boolean autoCommit, int maxFieldLength) 
+    throws CorruptIndexException, LockObtainFailedException, IOException {
+    init(d, a, closeDir, deletionPolicy, autoCommit, maxFieldLength, DocumentsWriter.DefaultIndexingChain);
+  }
+  
+  private void init(Directory d, Analyzer a, boolean closeDir, IndexDeletionPolicy deletionPolicy, 
+      boolean autoCommit, int maxFieldLength, IndexingChain indexingChain)
    throws CorruptIndexException, LockObtainFailedException, IOException {
    if (IndexReader.indexExists(d)) {
-      init(d, a, false, closeDir, deletionPolicy, autoCommit, maxFieldLength);
+      init(d, a, false, closeDir, deletionPolicy, autoCommit, maxFieldLength, indexingChain);
    } else {
-      init(d, a, true, closeDir, deletionPolicy, autoCommit, maxFieldLength);
+      init(d, a, true, closeDir, deletionPolicy, autoCommit, maxFieldLength, indexingChain);
    }
  }

-  private void init(Directory d, Analyzer a, final boolean create, boolean closeDir, IndexDeletionPolicy deletionPolicy, boolean autoCommit, int maxFieldLength)
+  private void init(Directory d, Analyzer a, final boolean create, boolean closeDir, 
+      IndexDeletionPolicy deletionPolicy, boolean autoCommit, int maxFieldLength)
+    throws CorruptIndexException, LockObtainFailedException, IOException {
+    init(d, a, create, closeDir, deletionPolicy, autoCommit, maxFieldLength, DocumentsWriter.DefaultIndexingChain);
+  }
+  private void init(Directory d, Analyzer a, final boolean create, boolean closeDir, 
+      IndexDeletionPolicy deletionPolicy, boolean autoCommit, int maxFieldLength,
+      IndexingChain indexingChain)
    throws CorruptIndexException, LockObtainFailedException, IOException {
    this.closeDir = closeDir;
    directory = d;
@ -1084,7 +1135,7 @@ public class IndexWriter {
      this.autoCommit = autoCommit;
      setRollbackSegmentInfos(segmentInfos);

-      docWriter = new DocumentsWriter(directory, this);
+      docWriter = new DocumentsWriter(directory, this, indexingChain);
      docWriter.setInfoStream(infoStream);
      docWriter.setMaxFieldLength(maxFieldLength);