LUCENE-550: Added RAMDirectory alternative as a contrib. Similar to MemoryIndex, but can hold more than one document

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@636745 13f79535-47bb-0310-9956-ffa450edef68
2008-03-13 12:34:30 +00:00 · 2008-03-13 12:34:30 +00:00 · f4cc6e2269
parent b36d9f9be3
commit f4cc6e2269
18 changed files with 2903 additions and 1 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -458,7 +458,10 @@ New features
    to consider token positions when creating PhraseQuery 
    and MultiPhraseQuery. Disabled by default (so by default
    the query parser ignores position increments).
-    (Doron Cohen)  
+    (Doron Cohen)
 13. LUCENE-550:  Added InstantiatedIndex implementation.  Experimental Index store similar to MemoryIndex but allows
 for multiple documents in memory.  (Karl Wettin via Grant Ingersoll)
 Optimizations
--- a/contrib/instantiated/build.xml
+++ b/contrib/instantiated/build.xml
@ -0,0 +1,32 @@
 <?xml version="1.0"?>
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
    this work for additional information regarding copyright ownership.
    The ASF licenses this file to You under the Apache License, Version 2.0
    the "License"); you may not use this file except in compliance with
    the License.  You may obtain a copy of the License at
        http://www.apache.org/licenses/LICENSE-2.0
    Unless required by applicable law or agreed to in writing, software
    distributed under the License is distributed on an "AS IS" BASIS,
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and
    limitations under the License.
 -->
 <project name="instantiated" default="default">
  <description>
    InstantiatedIndex, an alternative RAM store.
  </description>
  <property name="javac.source" value="1.5" />
  <property name="javac.target" value="1.5" />
  <import file="../contrib-build.xml"/>
 </project>
--- a/contrib/instantiated/docs/classdiagram.png
+++ b/contrib/instantiated/docs/classdiagram.png
--- a/contrib/instantiated/docs/classdiagram.uxf
+++ b/contrib/instantiated/docs/classdiagram.uxf
@ -0,0 +1,50 @@
 <?xml version="1.0" encoding="UTF-8"?><umlet_diagram><help_text>//Uncomment the following line to change the fontsize:
 //fontsize=14
 //Welcome to UMLet!
 // *Double-click on UML elements to add them to the diagram.
 // *Edit element properties by modifying the text in this panel.
 // *Edit the files in the 'palettes' directory to store your own element palettes.
 // *Press Del or Backspace to remove elements from the diagram.
 // *Hold down Ctrl key to select multiple elements.
 // *Press c to copy the UML diagram to the system clipboard.
 // * This text will be stored with each diagram. Feel free to use the area for notes.
 </help_text><element><type>com.umlet.element.base.Class</type><coordinates><x>310</x><y>540</y><w>310</w><h>110</h></coordinates><panel_attributes>bg=#eeeeee
 fg=#000000
 InstantiatedTermDocumentInformation
 --
 +payloads:byte[][]
 +termPositions:int[]
 +termOffsets:TermVectorOffsetInfo[]
 +indexFromTerm:int
 --</panel_attributes><additional_attributes></additional_attributes></element><element><type>com.umlet.element.base.Relation</type><coordinates><x>460</x><y>380</y><w>40</w><h>180</h></coordinates><panel_attributes>lt=.</panel_attributes><additional_attributes>20;20;20;160</additional_attributes></element><element><type>com.umlet.element.base.Relation</type><coordinates><x>460</x><y>40</y><w>61</w><h>160</h></coordinates><panel_attributes>lt=&lt;-
 q2=field
 m1=0..1</panel_attributes><additional_attributes>30;20;30;140</additional_attributes></element><element><type>com.umlet.element.base.Class</type><coordinates><x>430</x><y>30</y><w>120</w><h>30</h></coordinates><panel_attributes>bg=#eeeeee
 fg=#000099
 _norm: byte[][]_</panel_attributes><additional_attributes></additional_attributes></element><element><type>com.umlet.element.base.Class</type><coordinates><x>80</x><y>390</y><w>100</w><h>30</h></coordinates><panel_attributes>bg=#eeeeee
 fg=#000099
 Term</panel_attributes><additional_attributes></additional_attributes></element><element><type>com.umlet.element.base.Relation</type><coordinates><x>770</x><y>380</y><w>120</w><h>40</h></coordinates><panel_attributes>lt=-&gt;
 m2=1</panel_attributes><additional_attributes>20;20;100;20</additional_attributes></element><element><type>com.umlet.element.base.Class</type><coordinates><x>870</x><y>390</y><w>100</w><h>30</h></coordinates><panel_attributes>bg=#eeeeee
 fg=#000099
 Document</panel_attributes><additional_attributes></additional_attributes></element><element><type>com.umlet.element.base.Class</type><coordinates><x>590</x><y>370</y><w>200</w><h>60</h></coordinates><panel_attributes>bg=#eeeeee
 fg=#000000
 InstantiatedDocument
 --
 +documentNumber:int
 --</panel_attributes><additional_attributes></additional_attributes></element><element><type>com.umlet.element.base.Relation</type><coordinates><x>520</x><y>190</y><w>170</w><h>200</h></coordinates><panel_attributes>lt=&lt;-
 m1=0..*
 &lt;&lt;ordered&gt;&gt;</panel_attributes><additional_attributes>150;180;20;20</additional_attributes></element><element><type>com.umlet.element.base.Relation</type><coordinates><x>290</x><y>190</y><w>140</w><h>220</h></coordinates><panel_attributes>lt=&lt;-
 m1=0..*
 &lt;&lt;ordered&gt;&gt;</panel_attributes><additional_attributes>20;200;120;20</additional_attributes></element><element><type>com.umlet.element.base.Class</type><coordinates><x>380</x><y>180</y><w>200</w><h>30</h></coordinates><panel_attributes>bg=#eeeeee
 fg=#000000
 InstantiatedIndex</panel_attributes><additional_attributes></additional_attributes></element><element><type>com.umlet.element.base.Relation</type><coordinates><x>160</x><y>380</y><w>110</w><h>40</h></coordinates><panel_attributes>lt=-&gt;
 m2=1</panel_attributes><additional_attributes>90;20;20;20</additional_attributes></element><element><type>com.umlet.element.base.Class</type><coordinates><x>250</x><y>390</y><w>160</w><h>30</h></coordinates><panel_attributes>bg=#eeeeee
 fg=#000000
 InstantiatedTerm
 </panel_attributes><additional_attributes></additional_attributes></element><element><type>com.umlet.element.base.Relation</type><coordinates><x>380</x><y>190</y><w>146</w><h>220</h></coordinates><panel_attributes>lt=&lt;-
 q2=field, term
 m1=0..1</panel_attributes><additional_attributes>20;200;100;20</additional_attributes></element><element><type>com.umlet.element.base.Relation</type><coordinates><x>390</x><y>380</y><w>220</w><h>40</h></coordinates><panel_attributes>lt=-
 q2=field
 m2=0..*
 m1=0..*</panel_attributes><additional_attributes>20;20;200;20</additional_attributes></element></umlet_diagram>
--- a/contrib/instantiated/pom.xml.template
+++ b/contrib/instantiated/pom.xml.template
@ -0,0 +1,50 @@
 <project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
  <!--
    Licensed to the Apache Software Foundation (ASF) under one
    or more contributor license agreements.  See the NOTICE file
    distributed with this work for additional information
    regarding copyright ownership.  The ASF licenses this file
    to you under the Apache License, Version 2.0 (the
    "License"); you may not use this file except in compliance
    with the License.  You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
    Unless required by applicable law or agreed to in writing,
    software distributed under the License is distributed on an
    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
    KIND, either express or implied.  See the License for the
    specific language governing permissions and limitations
    under the License.
  -->
  <modelVersion>4.0.0</modelVersion>
  <parent>
    <groupId>org.apache.lucene</groupId>
    <artifactId>lucene-contrib</artifactId>
    <version>@version@</version>
  </parent>
  <groupId>org.apache.lucene</groupId>
  <artifactId>lucene-instantiated</artifactId>
  <name>Lucene InstantiatedIndex</name>
  <version>@version@</version>
  <description>InstantiatedIndex, alternative RAM store for small corpora.</description>
  <packaging>jar</packaging>
  <build>
    <plugins>
      <plugin>
        <groupId>org.apache.maven.plugins</groupId>
        <artifactId>maven-compiler-plugin</artifactId>
        <configuration>
          <source>1.5</source>
          <target>1.5</target>
        </configuration>
      </plugin>
    </plugins>
  </build>
 </project>
--- a/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedDocument.java
+++ b/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedDocument.java
@ -0,0 +1,79 @@
 package org.apache.lucene.store.instantiated;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import org.apache.lucene.document.Document;
 import java.io.Serializable;
 import java.util.List;
 import java.util.Map;
 /**
 * A document in the instantiated index object graph, optionally coupled to the vector space view. 
 *
 * @see org.apache.lucene.document.Document
 */
 public class InstantiatedDocument
    implements Serializable {
  private static long serialVersionUID = 1l;
  private Document document;
  public InstantiatedDocument() {
    this.document = new Document();
  }
  public InstantiatedDocument(Document document) {
    this.document = document;
  }
  /** this is the unsafe index order document number. */
  private Integer documentNumber;
  /** this is the term vector space view */
  private Map<String /*field name*/, List<InstantiatedTermDocumentInformation>> vectorSpace;
  /**
   * @return position of document in the index.
   */
  public Integer getDocumentNumber() {
    return documentNumber;
  }
  void setDocumentNumber(Integer documentNumber) {
    this.documentNumber = documentNumber;
  }
  public Map</*field name*/ String, List<InstantiatedTermDocumentInformation>> getVectorSpace() {
    return vectorSpace;
  }
  public void setVectorSpace(Map</*field name*/ String, List<InstantiatedTermDocumentInformation>> vectorSpace) {
    this.vectorSpace = vectorSpace;
  }
  public Document getDocument() {
    return document;
  }
  public String toString() {
    return document.toString();
  }
 }
--- a/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java
+++ b/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java
@ -0,0 +1,274 @@
 package org.apache.lucene.store.instantiated;
 /**
 * Copyright 2006 The Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.index.*;
 import java.io.IOException;
 import java.io.Serializable;
 import java.util.*;
 /**
 * Represented as a coupled graph of class instances, this
 * all-in-memory index store implementation delivers search
 * results up to a 100 times faster than the file-centric RAMDirectory
 * at the cost of greater RAM consumption.
 *
 *  WARNING: This contrib is experimental and the APIs may change without warning.
 *
 * There are no read and write locks in this store.
 * {@link InstantiatedIndexReader} {@link InstantiatedIndexReader#isCurrent()} all the time
 * and {@link org.apache.lucene.store.instantiated.InstantiatedIndexWriter}
 * will attempt to update instances of the object graph in memory
 * at the same time as a searcher is reading from it.
 *
 * Consider using InstantiatedIndex as if it was immutable.
 */
 public class InstantiatedIndex
    implements Serializable {
  private static final long serialVersionUID = 1l;
  private long version = System.currentTimeMillis();
  private InstantiatedDocument[] documentsByNumber;
  /** todo: this should be a BitSet */
  private Set<Integer> deletedDocuments;
  private Map<String, Map<String, InstantiatedTerm>> termsByFieldAndText;
  private InstantiatedTerm[] orderedTerms;
  private Map<String, byte[]> normsByFieldNameAndDocumentNumber;
  /**
   * Creates an empty instantiated index for you to fill with data using an {@link org.apache.lucene.store.instantiated.InstantiatedIndexWriter}. 
   */
  public InstantiatedIndex() {
    initialize();
  }
  void initialize() {
    // todo: clear index without loosing memory (uncouple stuff)
    termsByFieldAndText = new HashMap<String, Map<String, InstantiatedTerm>>();
    orderedTerms = new InstantiatedTerm[0];
    documentsByNumber = new InstantiatedDocument[0];
    normsByFieldNameAndDocumentNumber = new HashMap<String, byte[]>();
    deletedDocuments = new HashSet<Integer>();
  }
  /**
   * Creates a new instantiated index that looks just like the index in a specific state as represented by a reader.
   *
   * @param sourceIndexReader the source index this new instantiated index will be copied from.
   * @throws IOException if the source index is not optimized, or when accesing the source.
   */
  public InstantiatedIndex(IndexReader sourceIndexReader) throws IOException {
    this(sourceIndexReader, null);
  }
  /**
   * Creates a new instantiated index that looks just like the index in a specific state as represented by a reader.
   *
   * @param sourceIndexReader the source index this new instantiated index will be copied from.
   * @param fields fields to be added, or null for all
   * @throws IOException if the source index is not optimized, or when accesing the source.
   */
  public InstantiatedIndex(IndexReader sourceIndexReader, Set<String> fields) throws IOException {
    if (!sourceIndexReader.isOptimized()) {
      throw new IOException("Source index is not optimized.");
    }
    Collection<String> allFieldNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.ALL);
    initialize();
    documentsByNumber = new InstantiatedDocument[sourceIndexReader.numDocs()];
    // create documents
    for (int i = 0; i < sourceIndexReader.numDocs(); i++) {
      if (!sourceIndexReader.isDeleted(i)) {
        InstantiatedDocument document = new InstantiatedDocument();
        // copy stored fields from source reader
        Document sourceDocument = sourceIndexReader.document(i);
        for (Field field : (List<Field>) sourceDocument.getFields()) {
          if (fields == null || fields.contains(field.name())) {
            document.getDocument().add(field);
          }
        }
        document.setDocumentNumber(i);
        documentsByNumber[i] = document;
        for (Field field : (List<Field>) document.getDocument().getFields()) {
          if (fields == null || fields.contains(field.name())) {
            if (field.isTermVectorStored()) {
              if (document.getVectorSpace() == null) {
                document.setVectorSpace(new HashMap<String, List<InstantiatedTermDocumentInformation>>());
              }
              document.getVectorSpace().put(field.name(), new ArrayList<InstantiatedTermDocumentInformation>());
            }
          }
        }
      }
    }
    // create norms
    for (String fieldName : allFieldNames) {
      if (fields == null || fields.contains(fieldName)) {
        getNormsByFieldNameAndDocumentNumber().put(fieldName, sourceIndexReader.norms(fieldName));
      }
    }
    // create terms
    for (String fieldName : allFieldNames) {
      if (fields == null || fields.contains(fieldName)) {
        getTermsByFieldAndText().put(fieldName, new HashMap<String, InstantiatedTerm>(5000));
      }
    }
    List<InstantiatedTerm> terms = new ArrayList<InstantiatedTerm>(5000 * getTermsByFieldAndText().size());
    TermEnum termEnum = sourceIndexReader.terms();
    while (termEnum.next()) {
      if (fields == null || fields.contains(termEnum.term().field())) { // todo skipto if not using field
        InstantiatedTerm instantiatedTerm = new InstantiatedTerm(termEnum.term().field(), termEnum.term().text());
        getTermsByFieldAndText().get(termEnum.term().field()).put(termEnum.term().text(), instantiatedTerm);
        instantiatedTerm.setTermIndex(terms.size());
        terms.add(instantiatedTerm);
        instantiatedTerm.setAssociatedDocuments(new InstantiatedTermDocumentInformation[termEnum.docFreq()]);
      }
    }
    termEnum.close();
    orderedTerms = terms.toArray(new InstantiatedTerm[terms.size()]);
    // create term-document informations
    for (InstantiatedTerm term : orderedTerms) {
      TermPositions termPositions = sourceIndexReader.termPositions(term.getTerm());
      int position = 0;
      while (termPositions.next()) {
        InstantiatedDocument document = documentsByNumber[termPositions.doc()];
        byte[][] payloads = new byte[termPositions.freq()][];
        int[] positions = new int[termPositions.freq()];
        for (int i = 0; i < termPositions.freq(); i++) {
          positions[i] = termPositions.nextPosition();
          if (termPositions.isPayloadAvailable()) {
            payloads[i] = new byte[termPositions.getPayloadLength()];
            termPositions.getPayload(payloads[i], 0);
          }
        }
        InstantiatedTermDocumentInformation termDocumentInformation = new InstantiatedTermDocumentInformation(term, document, positions, payloads);
        term.getAssociatedDocuments()[position++] = termDocumentInformation;
        if (document.getVectorSpace() != null
            && document.getVectorSpace().containsKey(term.field())) {
          document.getVectorSpace().get(term.field()).add(termDocumentInformation);
        }
 //        termDocumentInformation.setIndexFromTerm(indexFromTerm++);
      }
    }
    // load offsets to term-document informations
    for (InstantiatedDocument document : getDocumentsByNumber()) {
      for (Field field : (List<Field>) document.getDocument().getFields()) {
        if (field.isTermVectorStored() && field.isStoreOffsetWithTermVector()) {
          TermPositionVector termPositionVector = (TermPositionVector) sourceIndexReader.getTermFreqVector(document.getDocumentNumber(), field.name());
          if (termPositionVector != null) {
            for (int i = 0; i < termPositionVector.getTerms().length; i++) {
              String token = termPositionVector.getTerms()[i];
              InstantiatedTerm term = findTerm(field.name(), token);
              InstantiatedTermDocumentInformation termDocumentInformation = term.getAssociatedDocument(document.getDocumentNumber());
              termDocumentInformation.setTermOffsets(termPositionVector.getOffsets(i));
            }
          }
        }
      }
    }
  }
  public InstantiatedIndexWriter indexWriterFactory(Analyzer analyzer, boolean create) throws IOException {
    return new InstantiatedIndexWriter(this, analyzer, create);
  }
  public InstantiatedIndexReader indexReaderFactory() throws IOException {
    return new InstantiatedIndexReader(this);
  }
  public void close() throws IOException {
    // todo: decouple everything
  }
  InstantiatedTerm findTerm(Term term) {
    return findTerm(term.field(), term.text());
  }
  InstantiatedTerm findTerm(String field, String text) {
    Map<String, InstantiatedTerm> termsByField = termsByFieldAndText.get(field);
    if (termsByField == null) {
      return null;
    } else {
      return termsByField.get(text);
    }
  }
  public Map<String, Map<String, InstantiatedTerm>> getTermsByFieldAndText() {
    return termsByFieldAndText;
  }
  public InstantiatedTerm[] getOrderedTerms() {
    return orderedTerms;
  }
  public InstantiatedDocument[] getDocumentsByNumber() {
    return documentsByNumber;
  }
  public Map<String, byte[]> getNormsByFieldNameAndDocumentNumber() {
    return normsByFieldNameAndDocumentNumber;
  }
  void setNormsByFieldNameAndDocumentNumber(Map<String, byte[]> normsByFieldNameAndDocumentNumber) {
    this.normsByFieldNameAndDocumentNumber = normsByFieldNameAndDocumentNumber;
  }
  public Set<Integer> getDeletedDocuments() {
    return deletedDocuments;
  }
  void setOrderedTerms(InstantiatedTerm[] orderedTerms) {
    this.orderedTerms = orderedTerms;
  }
  void setDocumentsByNumber(InstantiatedDocument[] documentsByNumber) {
    this.documentsByNumber = documentsByNumber;
  }
  public long getVersion() {
    return version;
  }
  void setVersion(long version) {
    this.version = version;
  }
 }
--- a/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java
+++ b/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java
@ -0,0 +1,326 @@
 package org.apache.lucene.store.instantiated;
 /**
 * Copyright 2006 The Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.FieldSelector;
 import org.apache.lucene.index.*;
 import org.apache.lucene.store.Directory;
 import java.io.IOException;
 import java.util.*;
 /**
 * An InstantiatedIndexReader is not a snapshot in time,
 * it is completely in sync with the latest commit to the store!
 *
 * Consider using InstantiatedIndex as if it was immutable.
 */
 public class InstantiatedIndexReader
    extends IndexReader {
  private final InstantiatedIndex index;
  public InstantiatedIndexReader(InstantiatedIndex index) {
    super();
    this.index = index;
  }
  /**
   * @return always true.
   */
  public boolean isOptimized() {
    return true;
  }
  /**
   * An InstantiatedIndexReader is not a snapshot in time,
   * it is completely in sync with the latest commit to the store!
   *  
   * @return output from {@link InstantiatedIndex#getVersion()} in associated instantiated index.
   */
  public long getVersion() {
    return index.getVersion();
  }
  public Directory directory() {
    throw new UnsupportedOperationException();
  }
  /**
   * An InstantiatedIndexReader is always current!
   *
   * Check whether this IndexReader is still using the
   * current (i.e., most recently committed) version of the
   * index.  If a writer has committed any changes to the
   * index since this reader was opened, this will return
   * <code>false</code>, in which case you must open a new
   * IndexReader in order to see the changes.  See the
   * description of the <a href="IndexWriter.html#autoCommit"><code>autoCommit</code></a>
   * flag which controls when the {@link IndexWriter}
   * actually commits changes to the index.
   *
   * @return always true
   * @throws CorruptIndexException if the index is corrupt
   * @throws IOException if there is a low-level IO error
   * @throws UnsupportedOperationException unless overridden in subclass
   */
  public boolean isCurrent() throws IOException {
    return true;
  }
  public InstantiatedIndex getIndex() {
    return index;
  }
  private Set<InstantiatedDocument> deletedDocuments = new HashSet<InstantiatedDocument>();
  private Set<Integer> deletedDocumentNumbers = new HashSet<Integer>();
  private Map<String, List<NormUpdate>> updatedNormsByFieldNameAndDocumentNumber = null;
  private class NormUpdate {
    private int doc;
    private byte value;
    public NormUpdate(int doc, byte value) {
      this.doc = doc;
      this.value = value;
    }
  }
  public int numDocs() {
    return getIndex().getDocumentsByNumber().length - index.getDeletedDocuments().size() - deletedDocuments.size();
  }
  public int maxDoc() {
    return getIndex().getDocumentsByNumber().length;
  }
  public boolean isDeleted(int n) {
    return getIndex().getDeletedDocuments().contains(n) || deletedDocumentNumbers.contains(n);
  }
  public boolean hasDeletions() {
    return getIndex().getDeletedDocuments().size() > 0 || deletedDocumentNumbers.size() > 0;
  }
  protected void doDelete(int docNum) throws IOException {
    if (!getIndex().getDeletedDocuments().contains(docNum)) {
      if (deletedDocumentNumbers.add(docNum)) {
        deletedDocuments.add(getIndex().getDocumentsByNumber()[docNum]);
      }
    }
  }
  protected void doUndeleteAll() throws IOException {
    deletedDocumentNumbers.clear();
    deletedDocuments.clear();
  }
  protected void doCommit() throws IOException {
    // todo: read/write lock
    boolean updated = false;
    // 1. update norms
    if (updatedNormsByFieldNameAndDocumentNumber != null) {
      for (Map.Entry<String, List<NormUpdate>> e : updatedNormsByFieldNameAndDocumentNumber.entrySet()) {
        byte[] norms = getIndex().getNormsByFieldNameAndDocumentNumber().get(e.getKey());
        for (NormUpdate normUpdate : e.getValue()) {
          norms[normUpdate.doc] = normUpdate.value;
        }
      }
      updatedNormsByFieldNameAndDocumentNumber = null;
      updated = true;
    }
    // 2. remove deleted documents
    if (deletedDocumentNumbers.size() > 0) {
      for (Integer doc : deletedDocumentNumbers) {
        getIndex().getDeletedDocuments().add(doc);
      }
      deletedDocumentNumbers.clear();
      deletedDocuments.clear();
      updated = true;
    }
    // todo unlock read/writelock
  }
  protected void doClose() throws IOException {
    // ignored
  }
  public Collection getFieldNames(FieldOption fldOption) {
    if (fldOption != FieldOption.ALL) {
      throw new IllegalArgumentException("Only FieldOption.ALL implemented."); // todo
    }
    return new ArrayList<String>(getIndex().getTermsByFieldAndText().keySet());
  }
  /**
   * This implementation ignores the field selector! All fields are always returned
   *
   * Get the {@link org.apache.lucene.document.Document} at the <code>n</code><sup>th</sup> position.
   *
   * @param n Get the document at the <code>n</code><sup>th</sup> position
   * @param fieldSelector ignored
   * @return The stored fields of the {@link org.apache.lucene.document.Document} at the nth position
   * @throws CorruptIndexException if the index is corrupt
   * @throws IOException if there is a low-level IO error
   *
   * @see org.apache.lucene.document.Fieldable
   * @see org.apache.lucene.document.FieldSelector
   * @see org.apache.lucene.document.SetBasedFieldSelector
   * @see org.apache.lucene.document.LoadFirstFieldSelector
   */
  public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
    return document(n);
  }
  public Document document(int n) throws IOException {
    if ((deletedDocumentNumbers != null
        && deletedDocumentNumbers.contains(n))
        ||
        (getIndex().getDeletedDocuments() != null
            && getIndex().getDeletedDocuments().contains(n))) {
      return null;
    }
    return getIndex().getDocumentsByNumber()[n].getDocument();
  }
  /**
   * never ever touch these values. it is the true values, unless norms have been touched.
   */
  public byte[] norms(String field) throws IOException {
    byte[] norms = getIndex().getNormsByFieldNameAndDocumentNumber().get(field);
    if (updatedNormsByFieldNameAndDocumentNumber != null) {
      norms = norms.clone();
      List<NormUpdate> updated = updatedNormsByFieldNameAndDocumentNumber.get(field);
      if (updated != null) {
        for (NormUpdate normUpdate : updated) {
          norms[normUpdate.doc] = normUpdate.value;
        }
      }
    }
    return norms;
  }
  public void norms(String field, byte[] bytes, int offset) throws IOException {
    byte[] norms = getIndex().getNormsByFieldNameAndDocumentNumber().get(field);
    System.arraycopy(norms, offset, bytes, 0, norms.length);
  }
  protected void doSetNorm(int doc, String field, byte value) throws IOException {
    if (updatedNormsByFieldNameAndDocumentNumber == null) {
      updatedNormsByFieldNameAndDocumentNumber = new HashMap<String, List<NormUpdate>>(getIndex().getNormsByFieldNameAndDocumentNumber().size());
    }
    List<NormUpdate> list = updatedNormsByFieldNameAndDocumentNumber.get(field);
    if (list == null) {
      list = new LinkedList<NormUpdate>();
      updatedNormsByFieldNameAndDocumentNumber.put(field, list);
    }
    list.add(new NormUpdate(doc, value));
  }
  public int docFreq(Term t) throws IOException {
    InstantiatedTerm term = getIndex().findTerm(t);
    if (term == null) {
      return 0;
    } else {
      return term.getAssociatedDocuments().length;
    }
  }
  public TermEnum terms() throws IOException {
    return new InstantiatedTermEnum(this);
  }
  public TermEnum terms(Term t) throws IOException {
    InstantiatedTerm it = getIndex().findTerm(t);
    if (it != null) {
      return new InstantiatedTermEnum(this, it.getTermIndex());      
    } else {
      int startPos = Arrays.binarySearch(index.getOrderedTerms(), t, InstantiatedTerm.termComparator);
      if (startPos < 0) {
        startPos = -1 -startPos;
      }
      return new InstantiatedTermEnum(this, startPos);
    }
  }
  public TermDocs termDocs() throws IOException {
    return new InstantiatedTermDocs(this);
  }
  public TermPositions termPositions() throws IOException {
    return new InstantiatedTermPositions(this);
  }
  public TermFreqVector[] getTermFreqVectors(int docNumber) throws IOException {
    InstantiatedDocument doc = getIndex().getDocumentsByNumber()[docNumber];
    if (doc.getVectorSpace() == null) {
      return null;
    }
    TermFreqVector[] ret = new TermFreqVector[doc.getVectorSpace().size()];
    Iterator<String> it = doc.getVectorSpace().keySet().iterator();
    for (int i = 0; i < ret.length; i++) {
      ret[i] = new InstantiatedTermPositionVector(getIndex().getDocumentsByNumber()[docNumber], it.next());
    }
    return ret;
  }
  public TermFreqVector getTermFreqVector(int docNumber, String field) throws IOException {
    InstantiatedDocument doc = getIndex().getDocumentsByNumber()[docNumber];
    if (doc.getVectorSpace() == null
        || doc.getVectorSpace().get(field) == null) {
      return null;
    } else {
      return new InstantiatedTermPositionVector(doc, field);
    }
  }
  public void getTermFreqVector(int docNumber, String field, TermVectorMapper mapper) throws IOException {
    InstantiatedDocument doc = getIndex().getDocumentsByNumber()[docNumber];
    if (doc.getVectorSpace() != null
        && doc.getVectorSpace().get(field) == null) {
      List<InstantiatedTermDocumentInformation> tv = doc.getVectorSpace().get(field);
      mapper.setExpectations(field, tv.size(), true, true);
      for (InstantiatedTermDocumentInformation tdi : tv) {
        mapper.map(tdi.getTerm().text(), tdi.getTermPositions().length, tdi.getTermOffsets(), tdi.getTermPositions());
      }
    }
  }
  public void getTermFreqVector(int docNumber, TermVectorMapper mapper) throws IOException {
    InstantiatedDocument doc = getIndex().getDocumentsByNumber()[docNumber];
    for (Map.Entry<String, List<InstantiatedTermDocumentInformation>> e : doc.getVectorSpace().entrySet()) {
      mapper.setExpectations(e.getKey(), e.getValue().size(), true, true);
      for (InstantiatedTermDocumentInformation tdi : e.getValue()) {
        mapper.map(tdi.getTerm().text(), tdi.getTermPositions().length, tdi.getTermOffsets(), tdi.getTermPositions());
      }
    }
  }
 }
--- a/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java
+++ b/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java
@ -0,0 +1,681 @@
 package org.apache.lucene.store.instantiated;
 /**
 * Copyright 2006 The Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermVectorOffsetInfo;
 import org.apache.lucene.search.DefaultSimilarity;
 import org.apache.lucene.search.Similarity;
 import java.io.IOException;
 import java.io.PrintStream;
 import java.io.StringReader;
 import java.util.*;
 /**
 * This class, similar to {@link org.apache.lucene.index.IndexWriter}, has no locking mechanism.
 * 
 * {@link org.apache.lucene.store.instantiated.InstantiatedIndexReader} is navigating
 * the same instances in memory as this writer is updating so searchers actice while
 * you are committing are bound to throw exceptions.
 *
 * Consider using InstantiatedIndex as if it was immutable.
 *
 * @see org.apache.lucene.index.IndexWriter 
 */
 public class InstantiatedIndexWriter {
  private PrintStream infoStream = null;
  private int maxFieldLength = IndexWriter.DEFAULT_MAX_FIELD_LENGTH;
  private final InstantiatedIndex index;
  private final Analyzer analyzer;
  private Similarity similarity = Similarity.getDefault(); // how to normalize;
  private transient Set<String> fieldNameBuffer;
  /**
   * linked to ensure chronological order
   */
  private Map<InstantiatedDocument, Map<FieldSetting, Map<String /*text*/, TermDocumentInformationFactory>>> termDocumentInformationFactoryByDocument = new LinkedHashMap<InstantiatedDocument, Map<FieldSetting, Map<String /*text*/, TermDocumentInformationFactory>>>(2000);
  private Set<InstantiatedDocument> unflushedDocuments = new HashSet<InstantiatedDocument>();
  public InstantiatedIndexWriter(InstantiatedIndex index) throws IOException {
    this(index, null);
  }
  public InstantiatedIndexWriter(InstantiatedIndex index, Analyzer analyzer) throws IOException {
    this(index, analyzer, false);
  }
  public InstantiatedIndexWriter(InstantiatedIndex index, Analyzer analyzer, boolean create) throws IOException {
    this.index = index;
    this.analyzer = analyzer;
    fieldNameBuffer = new HashSet<String>();
    if (create) {
      this.index.initialize();
    }
  }
  private int mergeFactor = 2500;
  /**
   * The sweetspot for this implementation is somewhere around 2500 at 2K text large documents.
   * <p/>
   * Benchmark output:
   * <pre>
   *  ------------> Report sum by Prefix (MAddDocs) and Round (8 about 8 out of 160153)
   *  Operation      round  mrg buf cmpnd   runCnt   recsPerRun        rec/s  elapsedSec    avgUsedMem    avgTotalMem
   *  MAddDocs_20000     0   10  10  true        1        20000         81,4      245,68   200 325 152    268 156 928
   *  MAddDocs_20000 -   1 1000  10  true -  -   1 -  -   20000 -  -   494,1 -  -  40,47 - 247 119 072 -  347 025 408
   *  MAddDocs_20000     2   10 100  true        1        20000        104,8      190,81   233 895 552    363 720 704
   *  MAddDocs_20000 -   3 2000 100  true -  -   1 -  -   20000 -  -   527,2 -  -  37,94 - 266 136 448 -  378 273 792
   *  MAddDocs_20000     4   10  10 false        1        20000        103,2      193,75   222 089 792    378 273 792
   *  MAddDocs_20000 -   5 3000  10 false -  -   1 -  -   20000 -  -   545,2 -  -  36,69 - 237 917 152 -  378 273 792
   *  MAddDocs_20000     6   10 100 false        1        20000        102,7      194,67   237 018 976    378 273 792
   *  MAddDocs_20000 -   7 4000 100 false -  -   1 -  -   20000 -  -   535,8 -  -  37,33 - 309 680 640 -  501 968 896
   * </pre>
   *
   * @see org.apache.lucene.index.IndexWriter#setMergeFactor(int)
   */
  public void setMergeFactor(int mergeFactor) {
    this.mergeFactor = mergeFactor;
  }
  /**
   * @see org.apache.lucene.index.IndexWriter#getMergeFactor()
   */
  public int getMergeFactor() {
    return mergeFactor;
  }
  /**
   * If non-null, information about merges and a message when
   * maxFieldLength is reached will be printed to this.
   */
  public void setInfoStream(PrintStream infoStream) {
    this.infoStream = infoStream;
  }
  public void abort() throws IOException {
    // what not
  }
  public void addIndexes(IndexReader[] readers) {
    throw new RuntimeException("Not implemented");
  }
  public PrintStream getInfoStream() {
    return infoStream;
  }
  /**
   * Flushes all changes to an index and closes all associated files.
   */
  public void close() throws IOException {
    commit();
  }
  /**
   * Returns the number of documents currently in this index.
   */
  public int docCount() {
    // todo: not certain. see http://www.nabble.com/IndexWriter.docCount-tf3128882.html#a8669483
    return index.getDocumentsByNumber().length /* - index.getDeletedDocuments().size() */ + unflushedDocuments.size();
  }
  /**
   * Locks the index and commits the buffered documents.
   */
  public void commit() throws IOException {
    // todo write lock, unless held by caller
    boolean orderedTermsDirty = false;
    Set<InstantiatedTerm> dirtyTerms = new HashSet<InstantiatedTerm>(1000);
    InstantiatedDocument[] documentsByNumber = new InstantiatedDocument[index.getDocumentsByNumber().length + termDocumentInformationFactoryByDocument.size()];
    System.arraycopy(index.getDocumentsByNumber(), 0, documentsByNumber, 0, index.getDocumentsByNumber().length);
    int documentNumber = index.getDocumentsByNumber().length;
    List<InstantiatedTerm> orderedTerms = new ArrayList<InstantiatedTerm>(index.getOrderedTerms().length + 5000);
    for (InstantiatedTerm instantiatedTerm : index.getOrderedTerms()) {
      orderedTerms.add(instantiatedTerm);
    }
    // update norm array with fake values for new documents
    Map<String, byte[]> normsByFieldNameAndDocumentNumber = new HashMap<String, byte[]>(index.getTermsByFieldAndText().size());
    Set<String> fieldNames = new HashSet<String>(20);
    fieldNames.addAll(index.getNormsByFieldNameAndDocumentNumber().keySet());
    fieldNames.addAll(fieldNameBuffer);
    for (String field : index.getTermsByFieldAndText().keySet()) {
      byte[] norms = new byte[index.getDocumentsByNumber().length + termDocumentInformationFactoryByDocument.size()];
      byte[] oldNorms = index.getNormsByFieldNameAndDocumentNumber().get(field);
      if (oldNorms != null) {
        System.arraycopy(oldNorms, 0, norms, 0, oldNorms.length);
        Arrays.fill(norms, oldNorms.length, norms.length, DefaultSimilarity.encodeNorm(1.0f));
      } else {
        Arrays.fill(norms, 0, norms.length, DefaultSimilarity.encodeNorm(1.0f));
      }
      normsByFieldNameAndDocumentNumber.put(field, norms);
      fieldNames.remove(field);
    }
    for (String field : fieldNames) {
      //System.out.println(field);
      byte[] norms = new byte[index.getDocumentsByNumber().length + termDocumentInformationFactoryByDocument.size()];
      Arrays.fill(norms, 0, norms.length, DefaultSimilarity.encodeNorm(1.0f));
      normsByFieldNameAndDocumentNumber.put(field, norms);
    }
    fieldNames.clear();
    index.setNormsByFieldNameAndDocumentNumber(normsByFieldNameAndDocumentNumber);
    for (Map.Entry<InstantiatedDocument, Map<FieldSetting, Map<String /*text*/, TermDocumentInformationFactory>>> eDocumentTermDocInfoByTermTextAndField : termDocumentInformationFactoryByDocument.entrySet()) {
      InstantiatedDocument document = eDocumentTermDocInfoByTermTextAndField.getKey();
      // assign document number
      document.setDocumentNumber(documentNumber++);
      documentsByNumber[document.getDocumentNumber()] = document;
      // set norms, prepare document and create optimized size collections.
      int numFieldsWithTermVectorsInDocument = 0;
      int termsInDocument = 0;
      for (Map.Entry<FieldSetting, Map<String /*text*/, TermDocumentInformationFactory>> eFieldTermDocInfoFactoriesByTermText : eDocumentTermDocInfoByTermTextAndField.getValue().entrySet()) {
        if (eFieldTermDocInfoFactoriesByTermText.getKey().storeTermVector) {
          numFieldsWithTermVectorsInDocument += eFieldTermDocInfoFactoriesByTermText.getValue().size();
        }
        termsInDocument += eFieldTermDocInfoFactoriesByTermText.getValue().size();
        if (eFieldTermDocInfoFactoriesByTermText.getKey().isIndexed && !eFieldTermDocInfoFactoriesByTermText.getKey().omitNorms) {
          float norm = eFieldTermDocInfoFactoriesByTermText.getKey().boost;
          norm *= document.getDocument().getBoost();
          norm *= similarity.lengthNorm(eFieldTermDocInfoFactoriesByTermText.getKey().fieldName, eFieldTermDocInfoFactoriesByTermText.getKey().fieldLength);
          normsByFieldNameAndDocumentNumber.get(eFieldTermDocInfoFactoriesByTermText.getKey().fieldName)[document.getDocumentNumber()] = Similarity.encodeNorm(norm);
        } else {
          System.currentTimeMillis();
        }
      }
      /** used for term vectors only, i think.. */
      Map<InstantiatedTerm, InstantiatedTermDocumentInformation> informationByTermOfCurrentDocument = new HashMap<InstantiatedTerm, InstantiatedTermDocumentInformation>(termsInDocument);
      Map<String, FieldSetting> documentFieldSettingsByFieldName = new HashMap<String, FieldSetting>(eDocumentTermDocInfoByTermTextAndField.getValue().size());
      // terms...
      for (Map.Entry<FieldSetting, Map<String /*text*/, TermDocumentInformationFactory>> eFieldSetting_TermDocInfoFactoriesByTermText : eDocumentTermDocInfoByTermTextAndField.getValue().entrySet()) {
        documentFieldSettingsByFieldName.put(eFieldSetting_TermDocInfoFactoriesByTermText.getKey().fieldName, eFieldSetting_TermDocInfoFactoriesByTermText.getKey());
        // find or create term
        for (Map.Entry<String /*text*/, TermDocumentInformationFactory> eTermText_TermDocInfoFactory : eFieldSetting_TermDocInfoFactoriesByTermText.getValue().entrySet()) {
          // get term..
          InstantiatedTerm term;
          Map<String, InstantiatedTerm> termsByText = index.getTermsByFieldAndText().get(eFieldSetting_TermDocInfoFactoriesByTermText.getKey().fieldName);
          if (termsByText == null) {
            termsByText = new HashMap<String, InstantiatedTerm>(1000);
            index.getTermsByFieldAndText().put(eFieldSetting_TermDocInfoFactoriesByTermText.getKey().fieldName, termsByText);
            term = new InstantiatedTerm(eFieldSetting_TermDocInfoFactoriesByTermText.getKey().fieldName, eTermText_TermDocInfoFactory.getKey());
            termsByText.put(eTermText_TermDocInfoFactory.getKey(), term);
            int pos = Collections.binarySearch(orderedTerms, term, InstantiatedTerm.comparator);
            pos = -1 - pos;
            orderedTerms.add(pos, term);
            orderedTermsDirty = true;
          } else {
            term = termsByText.get(eTermText_TermDocInfoFactory.getKey());
            if (term == null) {
              term = new InstantiatedTerm(eFieldSetting_TermDocInfoFactoriesByTermText.getKey().fieldName, eTermText_TermDocInfoFactory.getKey());
              termsByText.put(eTermText_TermDocInfoFactory.getKey(), term);
              int pos = Collections.binarySearch(orderedTerms, term, InstantiatedTerm.comparator);
              pos = -1 - pos;
              orderedTerms.add(pos, term);
              orderedTermsDirty = true;
            }
          }
          // create association term document infomation
          //
          // [Term]-- {0..*} | {0..* ordered} --(field)[Document]
          //
          //                 |
          //        [TermDocumentInformation]
          int[] positions = new int[eTermText_TermDocInfoFactory.getValue().termPositions.size()];
          for (int i = 0; i < positions.length; i++) {
            positions[i] = eTermText_TermDocInfoFactory.getValue().termPositions.get(i);
          }
          byte[][] payloads = new byte[eTermText_TermDocInfoFactory.getValue().payloads.size()][];
          for (int i = 0; i < payloads.length; i++) {
            payloads[i] = eTermText_TermDocInfoFactory.getValue().payloads.get(i);
          }
          // couple
          InstantiatedTermDocumentInformation info = new InstantiatedTermDocumentInformation(term, document, /*eTermText_TermDocInfoFactory.getValue().termFrequency,*/ positions, payloads);
          // todo optimize, this should be chached and updated to array in batches rather than appending the array once for every position!
          InstantiatedTermDocumentInformation[] associatedDocuments;
          if (term.getAssociatedDocuments() != null) {
            associatedDocuments = new InstantiatedTermDocumentInformation[term.getAssociatedDocuments().length + 1];
            System.arraycopy(term.getAssociatedDocuments(), 0, associatedDocuments, 0, term.getAssociatedDocuments().length);
          } else {
            associatedDocuments = new InstantiatedTermDocumentInformation[1];
          }
          associatedDocuments[associatedDocuments.length - 1] = info;          
          term.setAssociatedDocuments(associatedDocuments);
          // todo optimize, only if term vector?
          informationByTermOfCurrentDocument.put(term, info);
          dirtyTerms.add(term);
        }
        // term vector offsets
        if (eFieldSetting_TermDocInfoFactoriesByTermText.getKey().storeOffsetWithTermVector) {
          for (Map.Entry<InstantiatedTerm, InstantiatedTermDocumentInformation> e : informationByTermOfCurrentDocument.entrySet()) {
            if (eFieldSetting_TermDocInfoFactoriesByTermText.getKey().fieldName.equals(e.getKey().field())) {
              TermDocumentInformationFactory factory = eFieldSetting_TermDocInfoFactoriesByTermText.getValue().get(e.getKey().text());
              e.getValue().setTermOffsets(factory.termOffsets.toArray(new TermVectorOffsetInfo[factory.termOffsets.size()]));
            }
          }
        }
      }
      Map<String, List<InstantiatedTermDocumentInformation>> termDocumentInformationsByField = new HashMap<String, List<InstantiatedTermDocumentInformation>>();
      for (Map.Entry<InstantiatedTerm, InstantiatedTermDocumentInformation> eTerm_TermDocumentInformation : informationByTermOfCurrentDocument.entrySet()) {
        List<InstantiatedTermDocumentInformation> termDocumentInformations = termDocumentInformationsByField.get(eTerm_TermDocumentInformation.getKey().field());
        if (termDocumentInformations == null) {
          termDocumentInformations = new ArrayList<InstantiatedTermDocumentInformation>();
          termDocumentInformationsByField.put(eTerm_TermDocumentInformation.getKey().field(), termDocumentInformations);
        }
        termDocumentInformations.add(eTerm_TermDocumentInformation.getValue());
      }
      for (Map.Entry<String, List<InstantiatedTermDocumentInformation>> eField_TermDocInfos : termDocumentInformationsByField.entrySet()) {
        Collections.sort(eField_TermDocInfos.getValue(), new Comparator<InstantiatedTermDocumentInformation>() {
          public int compare(InstantiatedTermDocumentInformation instantiatedTermDocumentInformation, InstantiatedTermDocumentInformation instantiatedTermDocumentInformation1) {
            return instantiatedTermDocumentInformation.getTerm().getTerm().compareTo(instantiatedTermDocumentInformation1.getTerm().getTerm());
          }
        });
        // add term vector
        if (documentFieldSettingsByFieldName.get(eField_TermDocInfos.getKey()).storeTermVector) {
          if (document.getVectorSpace() == null) {
            document.setVectorSpace(new HashMap<String, List<InstantiatedTermDocumentInformation>>(documentFieldSettingsByFieldName.size()));
          }
          document.getVectorSpace().put(eField_TermDocInfos.getKey(), eField_TermDocInfos.getValue());
        }
      }
    }
    // order document informations in dirty terms
    for (InstantiatedTerm term : dirtyTerms) {
      // todo optimize, i belive this is useless, that the natural order is document number?
      Arrays.sort(term.getAssociatedDocuments(), InstantiatedTermDocumentInformation.documentNumberComparator);
 //      // update association class reference for speedy skipTo()
 //      for (int i = 0; i < term.getAssociatedDocuments().length; i++) {
 //        term.getAssociatedDocuments()[i].setIndexFromTerm(i);
 //      }
    }
    // flush to writer
    index.setDocumentsByNumber(documentsByNumber);
    index.setOrderedTerms(orderedTerms.toArray(new InstantiatedTerm[orderedTerms.size()]));
    // set term index
    if (orderedTermsDirty) {
      // todo optimize, only update from start position
      for (int i = 0; i < index.getOrderedTerms().length; i++) {
        index.getOrderedTerms()[i].setTermIndex(i);
      }
    }
    // remove deleted documents
    IndexReader indexDeleter = index.indexReaderFactory();
    if (unflushedDeletions.size() > 0) {
      for (Term term : unflushedDeletions) {
        indexDeleter.deleteDocuments(term);
      }
      unflushedDeletions.clear();
    }
    // all done, clear buffers
    unflushedDocuments.clear();
    termDocumentInformationFactoryByDocument.clear();
    fieldNameBuffer.clear();
    index.setVersion(System.currentTimeMillis());
    // todo unlock
    indexDeleter.close();
  }
  /**
   * Adds a document to this index.  If the document contains more than
   * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are
   * discarded.
   */
  public void addDocument(Document doc) throws IOException {
    addDocument(doc, getAnalyzer());
  }
  /**
   * Adds a document to this index, using the provided analyzer instead of the
   * value of {@link #getAnalyzer()}.  If the document contains more than
   * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are
   * discarded.
   *
   * @param doc
   * @param analyzer
   * @throws IOException
   */
  public void addDocument(Document doc, Analyzer analyzer) throws IOException {
    addDocument(new InstantiatedDocument(doc), analyzer);
  }
  /**
   * Tokenizes a document and adds it to the buffer.
   * Try to do all calculations in this method rather than in commit, as this is a non locking method.
   * Remember, this index implementation expects unlimited memory for maximum speed.
   *
   * @param document
   * @param analyzer
   * @throws IOException
   */
  protected void addDocument(InstantiatedDocument document, Analyzer analyzer) throws IOException {
    if (document.getDocumentNumber() != null) {
      throw new RuntimeException("Document number already set! Are you trying to add a document that already is bound to this or another index?");
    }
    // todo: write lock
    // normalize settings per field name in document
    Map<String /* field name */, FieldSetting> fieldSettingsByFieldName = new HashMap<String, FieldSetting>();
    for (Field field : (List<Field>) document.getDocument().getFields()) {
      FieldSetting fieldSettings = fieldSettingsByFieldName.get(field.name());
      if (fieldSettings == null) {
        fieldSettings = new FieldSetting();
        fieldSettings.fieldName = field.name().intern();
        fieldSettingsByFieldName.put(fieldSettings.fieldName, fieldSettings);
        fieldNameBuffer.add(fieldSettings.fieldName);
      }
      // todo: fixme: multiple fields with the same name does not mean field boost += more boost.
      fieldSettings.boost *= field.getBoost();
      //fieldSettings.dimensions++;
      // once fieldSettings, always fieldSettings.
      if (field.getOmitNorms() != fieldSettings.omitNorms) {
        fieldSettings.omitNorms = true;
      }
      if (field.isIndexed() != fieldSettings.isIndexed) {
        fieldSettings.isIndexed = true;
      }
      if (field.isTokenized() != fieldSettings.isTokenized) {
        fieldSettings.isTokenized = true;
      }
      if (field.isCompressed() != fieldSettings.isCompressed) {
        fieldSettings.isCompressed = true;
      }
      if (field.isStored() != fieldSettings.isStored) {
        fieldSettings.isStored = true;
      }
      if (field.isBinary() != fieldSettings.isBinary) {
        fieldSettings.isBinary = true;
      }
      if (field.isTermVectorStored() != fieldSettings.storeTermVector) {
        fieldSettings.storeTermVector = true;
      }
      if (field.isStorePositionWithTermVector() != fieldSettings.storePositionWithTermVector) {
        fieldSettings.storePositionWithTermVector = true;
      }
      if (field.isStoreOffsetWithTermVector() != fieldSettings.storeOffsetWithTermVector) {
        fieldSettings.storeOffsetWithTermVector = true;
      }
    }
    Map<Field, LinkedList<Token>> tokensByField = new LinkedHashMap<Field, LinkedList<Token>>(20);
    // tokenize indexed fields.
    for (Iterator<Field> it = (Iterator<Field>) document.getDocument().getFields().iterator(); it.hasNext();) {
      Field field = it.next();
      FieldSetting fieldSettings = fieldSettingsByFieldName.get(field.name());
      if (field.isIndexed()) {
        LinkedList<Token> tokens = new LinkedList<Token>();
        tokensByField.put(field, tokens);
        if (field.isTokenized()) {
          int termCounter = 0;
          final TokenStream tokenStream;
          // todo readerValue(), binaryValue()
          if (field.tokenStreamValue() != null) {
            tokenStream = field.tokenStreamValue();
          } else {
            tokenStream = analyzer.tokenStream(field.name(), new StringReader(field.stringValue()));
          }
          Token next = tokenStream.next();
          while (next != null) {
            next.setTermText(next.termText().intern()); // todo: not sure this needs to be interned?
            tokens.add(next); // the vector will be built on commit.
            next = tokenStream.next();
            fieldSettings.fieldLength++;
            if (fieldSettings.fieldLength > maxFieldLength) {
              break;
            }
          }
        } else {
          // untokenized
          tokens.add(new Token(field.stringValue().intern(), 0, field.stringValue().length(), "untokenized"));
          fieldSettings.fieldLength++;
        }
      }
      if (!field.isStored()) {
        it.remove();
      }
    }
    Map<FieldSetting, Map<String /*text*/, TermDocumentInformationFactory>> termDocumentInformationFactoryByTermTextAndFieldSetting = new HashMap<FieldSetting, Map<String /*text*/, TermDocumentInformationFactory>>();
    termDocumentInformationFactoryByDocument.put(document, termDocumentInformationFactoryByTermTextAndFieldSetting);
    // build term vector, term positions and term offsets
    for (Map.Entry<Field, LinkedList<Token>> eField_Tokens : tokensByField.entrySet()) {
      FieldSetting fieldSettings = fieldSettingsByFieldName.get(eField_Tokens.getKey().name());
      Map<String, TermDocumentInformationFactory> termDocumentInformationFactoryByTermText = termDocumentInformationFactoryByTermTextAndFieldSetting.get(fieldSettingsByFieldName.get(eField_Tokens.getKey().name()));
      if (termDocumentInformationFactoryByTermText == null) {
        termDocumentInformationFactoryByTermText = new HashMap<String /*text*/, TermDocumentInformationFactory>();
        termDocumentInformationFactoryByTermTextAndFieldSetting.put(fieldSettingsByFieldName.get(eField_Tokens.getKey().name()), termDocumentInformationFactoryByTermText);
      }
      int lastOffset = 0;
      // for each new field, move positions a bunch.
      if (fieldSettings.position > 0) {
        // todo what if no analyzer set, multiple fields with same name and index without tokenization?
        fieldSettings.position += analyzer.getPositionIncrementGap(fieldSettings.fieldName);
      }
      for (Token token : eField_Tokens.getValue()) {
        TermDocumentInformationFactory termDocumentInformationFactory = termDocumentInformationFactoryByTermText.get(token.termText());
        if (termDocumentInformationFactory == null) {
          termDocumentInformationFactory = new TermDocumentInformationFactory();
          termDocumentInformationFactoryByTermText.put(token.termText(), termDocumentInformationFactory);
        }
        //termDocumentInformationFactory.termFrequency++;
        fieldSettings.position += (token.getPositionIncrement() - 1);
        termDocumentInformationFactory.termPositions.add(fieldSettings.position++);
        if (token.getPayload() != null && token.getPayload().length() > 0) {
          termDocumentInformationFactory.payloads.add(token.getPayload().toByteArray());
        } else {
          termDocumentInformationFactory.payloads.add(null);
        }
        if (eField_Tokens.getKey().isStoreOffsetWithTermVector()) {
          termDocumentInformationFactory.termOffsets.add(new TermVectorOffsetInfo(fieldSettings.offset + token.startOffset(), fieldSettings.offset + token.endOffset()));
          lastOffset = fieldSettings.offset + token.endOffset();
        }
      }
      if (eField_Tokens.getKey().isStoreOffsetWithTermVector()) {
        fieldSettings.offset = lastOffset + 1;
      }
    }
    unflushedDocuments.add(document);
    // if too many documents in buffer, commit.
    if (unflushedDocuments.size() >= getMergeFactor()) {
      commit(/*lock*/);
    }
    // todo: unlock write lock
  }
  private Set<Term> unflushedDeletions = new HashSet<Term>();
  public void deleteDocuments(Term term) throws IOException {
    unflushedDeletions.add(term);
  }
  public void deleteDocuments(Term[] terms) throws IOException {
    for (Term term : terms) {
      deleteDocuments(term);
    }
  }
  public void updateDocument(Term term, Document doc) throws IOException {
    updateDocument(term, doc, getAnalyzer());
  }
  public void updateDocument(Term term, Document doc, Analyzer analyzer) throws IOException {
    deleteDocuments(term);
    addDocument(doc, analyzer);
  }
  public int getMaxFieldLength() {
    return maxFieldLength;
  }
  public void setMaxFieldLength(int maxFieldLength) {
    this.maxFieldLength = maxFieldLength;
  }
  public Similarity getSimilarity() {
    return similarity;
  }
  public void setSimilarity(Similarity similarity) {
    this.similarity = similarity;
  }
  public Analyzer getAnalyzer() {
    return analyzer;
  }
  private class FieldSetting {
    private String fieldName;
    private float boost = 1;
    //private int dimensions = 0; // this is futuristic
    private int position = 0;
    private int offset;
    private int fieldLength = 0;
    private boolean storeTermVector = false;
    private boolean storeOffsetWithTermVector = false;
    private boolean storePositionWithTermVector = false;
    private boolean omitNorms = false;
    private boolean isTokenized = false;
    private boolean isStored = false;
    private boolean isIndexed = false;
    private boolean isBinary = false;
    private boolean isCompressed = false;
    //private float norm;
    //private byte encodedNorm;
    public boolean equals(Object o) {
      if (this == o) return true;
      if (o == null || getClass() != o.getClass()) return false;
      final FieldSetting that = (FieldSetting) o;
      return fieldName.equals(that.fieldName);
    }
    public int hashCode() {
      return fieldName.hashCode();
    }
  }
  private class TermDocumentInformationFactory {
    private LinkedList<byte[]> payloads = new LinkedList<byte[]>();
    private LinkedList<Integer> termPositions = new LinkedList<Integer>();
    private LinkedList<TermVectorOffsetInfo> termOffsets = new LinkedList<TermVectorOffsetInfo>();
  }
 }
--- a/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTerm.java
+++ b/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTerm.java
@ -0,0 +1,250 @@
 package org.apache.lucene.store.instantiated;
 /**
 * Copyright 2006 The Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import org.apache.lucene.index.Term;
 import java.io.Serializable;
 import java.util.Comparator;
 import java.util.Collections;
 import java.util.Arrays;
 /**
 * A term in the inverted index, coupled to the documents it occurs in.
 *
 * @see org.apache.lucene.index.Term
 */
 public class InstantiatedTerm
    implements Serializable {
  private static final long serialVersionUID = 1l;
  public static final Comparator<InstantiatedTerm> comparator = new Comparator<InstantiatedTerm>() {
    public int compare(InstantiatedTerm instantiatedTerm, InstantiatedTerm instantiatedTerm1) {
      return instantiatedTerm.getTerm().compareTo(instantiatedTerm1.getTerm());
    }
  };
  public static final Comparator termComparator = new Comparator() {
    public int compare(Object o, Object o1) {
      return ((InstantiatedTerm)o).getTerm().compareTo((Term)o1);
    }
  };
  private Term term;
  /**
   * index of term in InstantiatedIndex
   * @see org.apache.lucene.store.instantiated.InstantiatedIndex#getOrderedTerms() */
  private int termIndex;
  /**
   * @return Term associated with this entry of the index object graph
   */
  public Term getTerm() {
    return term;
  }
  InstantiatedTerm(String field, String text) {
    this.term = new Term(field, text);
  }
 //  this could speed up TermDocs.skipTo even more
 //  private Map</** document number*/Integer, /** index in associatedDocuments */Integer> associatedDocumentIndexByDocumentNumber = new HashMap<Integer, Integer>();
 //
 //  public Map</** document number*/Integer, /** index in associatedDocuments */Integer> getAssociatedDocumentIndexByDocumentNumber() {
 //    return associatedDocumentIndexByDocumentNumber;
 //  }
  /** Ordered by document number */
  private InstantiatedTermDocumentInformation[] associatedDocuments;
  /**
   * Meta data per document in wich this term is occuring.
   * Ordered by document number.
   *
   * @return Meta data per document in wich this term is occuring.
   */
  public InstantiatedTermDocumentInformation[] getAssociatedDocuments() {
    return associatedDocuments;
  }
  /**
   * Meta data per document in wich this term is occuring.
   * Ordered by document number.
   *
   * @param associatedDocuments meta data per document in wich this term is occuring, ordered by document number
   */
  void setAssociatedDocuments(InstantiatedTermDocumentInformation[] associatedDocuments) {
    this.associatedDocuments = associatedDocuments;
  }
  /**
   * Finds index to the first beyond the current whose document number is
   * greater than or equal to <i>target</i>, -1 if there is no such element.
   *
   * @param target the document number to match
   * @return -1 if there is no such element
   */
  public int seekCeilingDocumentInformationIndex(int target) {
    return seekCeilingDocumentInformationIndex(target, 0, getAssociatedDocuments().length);
  }
  /**
   * Finds index to the first beyond the current whose document number is
   * greater than or equal to <i>target</i>, -1 if there is no such element.
   *
   * @param target the document number to match
   * @param startOffset associated documents index start offset
   * @return -1 if there is no such element
   */
  public int seekCeilingDocumentInformationIndex(int target, int startOffset) {
    return seekCeilingDocumentInformationIndex(target, startOffset, getAssociatedDocuments().length);
  }
  /**
   * Finds index to the first beyond the current whose document number is
   * greater than or equal to <i>target</i>, -1 if there is no such element.
   *
   * @param target the document number to match
   * @param startOffset associated documents index start offset
   * @param endPosition associated documents index end position
   * @return -1 if there is no such element
   */
  public int seekCeilingDocumentInformationIndex(int target, int startOffset, int endPosition) {
    int pos = binarySearchAssociatedDocuments(target, startOffset, endPosition - startOffset);
 //    int pos = Arrays.binarySearch(getAssociatedDocuments(), target, InstantiatedTermDocumentInformation.doumentNumberIntegerComparator);
    if (pos < 0) {
      pos = -1 - pos;
    }
    if (getAssociatedDocuments().length <= pos) {
      return -1;
    } else {
      return pos;
    }
  }
  public int binarySearchAssociatedDocuments(int target) {
    return binarySearchAssociatedDocuments(target, 0);
  }
  public int binarySearchAssociatedDocuments(int target, int offset) {
    return binarySearchAssociatedDocuments(target, offset, associatedDocuments.length - offset);
  }
  /**
   * @param target value to search for in the array
   * @param offset index of the first valid value in the array
   * @param length number of valid values in the array
   * @return index of an occurrence of key in array, or -(insertionIndex + 1) if key is not contained in array (<i>insertionIndex</i> is then the index at which key could be inserted).
   */
  public int binarySearchAssociatedDocuments(int target, int offset, int length) {
    // implementation originally from http://ochafik.free.fr/blog/?p=106
    if (length == 0) {
      return -1 - offset;
    }
    int min = offset, max = offset + length - 1;
    int minVal = getAssociatedDocuments()[min].getDocument().getDocumentNumber();
    int maxVal = getAssociatedDocuments()[max].getDocument().getDocumentNumber();
    int nPreviousSteps = 0;
    for (; ;) {
      // be careful not to compute key - minVal, for there might be an integer overflow.
      if (target <= minVal) return target == minVal ? min : -1 - min;
      if (target >= maxVal) return target == maxVal ? max : -2 - max;
      assert min != max;
      int pivot;
      // A typical binarySearch algorithm uses pivot = (min + max) / 2.
      // The pivot we use here tries to be smarter and to choose a pivot close to the expectable location of the key.
      // This reduces dramatically the number of steps needed to get to the key.
      // However, it does not work well with a logaritmic distribution of values, for instance.
      // When the key is not found quickly the smart way, we switch to the standard pivot.
      if (nPreviousSteps > 2) {
        pivot = (min + max) >> 1;
        // stop increasing nPreviousSteps from now on
      } else {
        // NOTE: We cannot do the following operations in int precision, because there might be overflows.
        //       long operations are slower than float operations with the hardware this was tested on (intel core duo 2, JVM 1.6.0).
        //       Overall, using float proved to be the safest and fastest approach.
        pivot = min + (int) ((target - (float) minVal) / (maxVal - (float) minVal) * (max - min));
        nPreviousSteps++;
      }
      int pivotVal = getAssociatedDocuments()[pivot].getDocument().getDocumentNumber();
      // NOTE: do not store key - pivotVal because of overflows
      if (target > pivotVal) {
        min = pivot + 1;
        max--;
      } else if (target == pivotVal) {
        return pivot;
      } else {
        min++;
        max = pivot - 1;
      }
      maxVal = getAssociatedDocuments()[max].getDocument().getDocumentNumber();
      minVal = getAssociatedDocuments()[min].getDocument().getDocumentNumber();
    }
  }
  /**
   * Navigates to the view of this occurances of this term in a specific document. 
   *
   * This method is only used by InstantiatedIndex(IndexReader) and
   * should not be optimized for less CPU at the cost of more RAM.
   *
   * @param documentNumber the n:th document in the index
   * @return view of this term from specified document
   */
  public InstantiatedTermDocumentInformation getAssociatedDocument(int documentNumber) {
    int pos = binarySearchAssociatedDocuments(documentNumber);
    return pos < 0 ? null : getAssociatedDocuments()[pos];
  }
  public final String field() {
    return term.field();
  }
  public final String text() {
    return term.text();
  }
  public String toString() {
    return term.toString();
  }
  public int getTermIndex() {
    return termIndex;
  }
  public void setTermIndex(int termIndex) {
    this.termIndex = termIndex;
  }
 }
--- a/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermDocs.java
+++ b/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermDocs.java
@ -0,0 +1,136 @@
 package org.apache.lucene.store.instantiated;
 /**
 * Copyright 2006 The Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermDocs;
 /**
 * A {@link org.apache.lucene.index.TermDocs} navigating an {@link InstantiatedIndexReader}.
 */
 public class InstantiatedTermDocs
    implements TermDocs {
  private final InstantiatedIndexReader reader;
  public InstantiatedTermDocs(InstantiatedIndexReader reader) {
    this.reader = reader;
  }
  private int currentDocumentIndex;
  protected InstantiatedTermDocumentInformation currentDocumentInformation;
  protected InstantiatedTerm currentTerm;
  public void seek(Term term) {
    currentTerm = reader.getIndex().findTerm(term);
    currentDocumentIndex = -1;
  }
  public void seek(org.apache.lucene.index.TermEnum termEnum) {
    seek(termEnum.term());
  }
  public int doc() {
    return currentDocumentInformation.getDocument().getDocumentNumber();
  }
  public int freq() {
    return currentDocumentInformation.getTermPositions().length;
  }
  public boolean next() {
    if (currentTerm != null) {
      currentDocumentIndex++;
      if (currentDocumentIndex < currentTerm.getAssociatedDocuments().length) {
        currentDocumentInformation = currentTerm.getAssociatedDocuments()[currentDocumentIndex];
        if (reader.hasDeletions() && reader.isDeleted(currentDocumentInformation.getDocument().getDocumentNumber())) {
          return next();
        } else {
          return true;
        }
      }
    }
    return false;
  }
  public int read(int[] docs, int[] freqs) {
    int i;
    for (i = 0; i < docs.length; i++) {
      if (!next()) {
        break;
      }
      docs[i] = doc();
      freqs[i] = freq();
    }
    return i;
  }
  /**
   * Skips entries to the first beyond the current whose document number is
   * greater than or equal to <i>target</i>. <p>Returns true if there is such
   * an entry.  <p>Behaves as if written: <pre>
   *   boolean skipTo(int target) {
   *     do {
   *       if (!next())
   * 	     return false;
   *     } while (target > doc());
   *     return true;
   *   }
   * </pre>
   * This implementation is considerably more efficient than that.
   *
   */
  public boolean skipTo(int target) {
    if (currentTerm == null) {
      return false;
    }
    if (currentDocumentIndex >= target) {
      return next();
    }
    int startOffset = currentDocumentIndex >= 0 ? currentDocumentIndex : 0;
    int pos = currentTerm.seekCeilingDocumentInformationIndex(target, startOffset);
    if (pos == -1) {
      return false;
    }
    currentDocumentInformation = currentTerm.getAssociatedDocuments()[pos];
    currentDocumentIndex = pos;
    if (reader.hasDeletions() && reader.isDeleted(currentDocumentInformation.getDocument().getDocumentNumber())) {
      return next();
    } else {
      return true;
    }
  }
  /**
   * Does nothing
   */
  public void close() {
  }
 }
--- a/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermDocumentInformation.java
+++ b/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermDocumentInformation.java
@ -0,0 +1,139 @@
 package org.apache.lucene.store.instantiated;
 import org.apache.lucene.index.TermVectorOffsetInfo;
 import java.io.Serializable;
 import java.util.Comparator;
 /**
 * Copyright 2006 The Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /**
 * There is one instance of this class per indexed term in a document
 * and it contains the meta data about each occurance of a term in a docment.
 *
 * It is the inner glue of the inverted index.
 *
 * <pre>
 * [Term]-- {0..*} | {0..*} --(field)[Document]
 *            &lt;&lt;ordered>>
 *                 |
 *    [TermDocumentInformation]
 *       +payloads
 *       +termPositions
 *       +termOffsets
 * </pre>
 * 
 */
 public class InstantiatedTermDocumentInformation
    implements Serializable {
  private static final long serialVersionUID = 1l;
  public static final Comparator<InstantiatedTermDocumentInformation> termComparator = new Comparator<InstantiatedTermDocumentInformation>() {
    public int compare(InstantiatedTermDocumentInformation instantiatedTermDocumentInformation, InstantiatedTermDocumentInformation instantiatedTermDocumentInformation1) {
      return instantiatedTermDocumentInformation.getTerm().getTerm().compareTo(instantiatedTermDocumentInformation1.getTerm());
    }
  };
  public static final Comparator<InstantiatedTermDocumentInformation> documentNumberComparator = new Comparator<InstantiatedTermDocumentInformation>() {
    public int compare(InstantiatedTermDocumentInformation instantiatedTermDocumentInformation, InstantiatedTermDocumentInformation instantiatedTermDocumentInformation1) {
      return instantiatedTermDocumentInformation.getDocument().getDocumentNumber().compareTo(instantiatedTermDocumentInformation1.getDocument().getDocumentNumber());
    }
  };
  public static final Comparator doumentNumberIntegerComparator = new Comparator() {
    public int compare(Object o1, Object o2) {
      InstantiatedTermDocumentInformation di = (InstantiatedTermDocumentInformation) o1;
      Integer i = (Integer) o2;
      return di.getDocument().getDocumentNumber().compareTo(i);
    }
  };
  private byte[][] payloads;
  private int[] termPositions;
  private InstantiatedTerm term;
  private InstantiatedDocument document;
  private TermVectorOffsetInfo[] termOffsets;
  public InstantiatedTermDocumentInformation(InstantiatedTerm term, InstantiatedDocument document, int[] termPositions, byte[][] payloads) {
    this.term = term;
    this.document = document;
    this.termPositions = termPositions;
    this.payloads = payloads;
  }
 // not quite sure why I wanted this.
 //  /**
 //   * [Term]--- {0..* ordered} ->[Info]
 //   */
 //  private int indexFromTerm;
 //  public int getIndexFromTerm() {
 //    return indexFromTerm;
 //  }
 //
 //  void setIndexFromTerm(int indexFromTerm) {
 //    this.indexFromTerm = indexFromTerm;
 //  }
  public int[] getTermPositions() {
    return termPositions;
  }
  public byte[][] getPayloads() {
    return payloads;
  }
  public InstantiatedDocument getDocument() {
    return document;
  }
  public InstantiatedTerm getTerm() {
    return term;
  }
  void setTermPositions(int[] termPositions) {
    this.termPositions = termPositions;
  }
  void setTerm(InstantiatedTerm term) {
    this.term = term;
  }
  void setDocument(InstantiatedDocument document) {
    this.document = document;
  }
  public TermVectorOffsetInfo[] getTermOffsets() {
    return termOffsets;
  }
  void setTermOffsets(TermVectorOffsetInfo[] termOffsets) {
    this.termOffsets = termOffsets;
  }
 }
--- a/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermEnum.java
+++ b/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermEnum.java
@ -0,0 +1,109 @@
 package org.apache.lucene.store.instantiated;
 /**
 * Copyright 2006 The Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermEnum;
 import java.io.IOException;
 import java.util.Arrays;
 /**
 * A {@link org.apache.lucene.index.TermEnum} navigating an {@link org.apache.lucene.store.instantiated.InstantiatedIndexReader}.
 */
 public class InstantiatedTermEnum
    extends TermEnum {
  private final InstantiatedIndexReader reader;
  public InstantiatedTermEnum(InstantiatedIndexReader reader) {
    this.nextTermIndex = 0;
    this.reader = reader;
  }
  public InstantiatedTermEnum(InstantiatedIndexReader reader, int startPosition) {
    this.reader = reader;
    this.nextTermIndex = startPosition;
    next();
  }
  private int nextTermIndex;
  private InstantiatedTerm term;
  /**
   * Increments the enumeration to the next element.  True if one exists.
   */
  public boolean next() {
    if (reader.getIndex().getOrderedTerms().length <= nextTermIndex) {
      return false;
    } else {
      term = reader.getIndex().getOrderedTerms()[nextTermIndex];
      nextTermIndex++;
      return true;
    }
  }
  /**
   * Returns the current Term in the enumeration.
   */
  public Term term() {
    return /*term == null ? null :*/ term.getTerm();
  }
  /**
   * Returns the docFreq of the current Term in the enumeration.
   */
  public int docFreq() {
    return term.getAssociatedDocuments().length;
  }
  /**
   * Closes the enumeration to further activity, freeing resources.
   */
  public void close() {
  }
  public boolean skipTo(Term target) throws IOException {
    // this method is not known to be used by anything
    // in lucene for many years now, so there is
    // very to gain by optimizing this method more,
    InstantiatedTerm term = reader.getIndex().findTerm(target);
    if (term != null) {
      this.term = term;
      nextTermIndex = term.getTermIndex() + 1;
      return true;
    } else {
      int pos = Arrays.binarySearch(reader.getIndex().getOrderedTerms(), target, InstantiatedTerm.termComparator);
      if (pos < 0) {
        pos = -1 - pos;
      }
      if (pos > reader.getIndex().getOrderedTerms().length) {
        return false;
      }
      this.term = reader.getIndex().getOrderedTerms()[pos];
      nextTermIndex = pos + 1;
      return true;
    }
  }
 }
--- a/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermFreqVector.java
+++ b/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermFreqVector.java
@ -0,0 +1,112 @@
 package org.apache.lucene.store.instantiated;
 import org.apache.lucene.index.TermFreqVector;
 import java.io.Serializable;
 import java.util.Arrays;
 import java.util.List;
 /**
 * Copyright 2006 The Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /**
 * Vector space view of a document in an {@link InstantiatedIndexReader}.
 *
 * @see org.apache.lucene.index.TermFreqVector
 */
 public class InstantiatedTermFreqVector
    implements TermFreqVector, Serializable {
  private static final long serialVersionUID = 1l;
  private final List<InstantiatedTermDocumentInformation> termDocumentInformations;
  private final String field;
  private final String terms[];
  private final int termFrequencies[];
  public InstantiatedTermFreqVector(InstantiatedDocument document, String field) {
    this.field = field;
    termDocumentInformations = document.getVectorSpace().get(field);
    terms = new String[termDocumentInformations.size()];
    termFrequencies = new int[termDocumentInformations.size()];
    for (int i = 0; i < termDocumentInformations.size(); i++) {
      InstantiatedTermDocumentInformation termDocumentInformation = termDocumentInformations.get(i);
      terms[i] = termDocumentInformation.getTerm().text();
      termFrequencies[i] = termDocumentInformation.getTermPositions().length;
    }
  }
  /**
   * @return The number of the field this vector is associated with
   */
  public String getField() {
    return field;
  }
  public String toString() {
    StringBuffer sb = new StringBuffer();
    sb.append('{');
    sb.append(field).append(": ");
    if (terms != null) {
      for (int i = 0; i < terms.length; i++) {
        if (i > 0) sb.append(", ");
        sb.append(terms[i]).append('/').append(termFrequencies[i]);
      }
    }
    sb.append('}');
    return sb.toString();
  }
  public int size() {
    return terms == null ? 0 : terms.length;
  }
  public String[] getTerms() {
    return terms;
  }
  public int[] getTermFrequencies() {
    return termFrequencies;
  }
  public int indexOf(String termText) {
    if (terms == null)
      return -1;
    int res = Arrays.binarySearch(terms, termText);
    return res >= 0 ? res : -1;
  }
  public int[] indexesOf(String[] termNumbers, int start, int len) {
    // TODO: there must be a more efficient way of doing this.
    //       At least, we could advance the lower bound of the terms array
    //       as we find valid indices. Also, it might be possible to leverage
    //       this even more by starting in the middle of the termNumbers array
    //       and thus dividing the terms array maybe in half with each found index.
    int res[] = new int[len];
    for (int i = 0; i < len; i++) {
      res[i] = indexOf(termNumbers[start + i]);
    }
    return res;
  }
  public List<InstantiatedTermDocumentInformation> getTermDocumentInformations() {
    return termDocumentInformations;
  }
 }
--- a/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermPositionVector.java
+++ b/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermPositionVector.java
@ -0,0 +1,47 @@
 package org.apache.lucene.store.instantiated;
 /**
 * Copyright 2006 The Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import org.apache.lucene.index.TermPositionVector;
 import org.apache.lucene.index.TermVectorOffsetInfo;
 import java.io.Serializable;
 /**
 * Extended vector space view of a document in an {@link InstantiatedIndexReader}.
 *
 * @see org.apache.lucene.index.TermPositionVector
 */
 public class InstantiatedTermPositionVector
    extends InstantiatedTermFreqVector
    implements TermPositionVector, Serializable {
  private static final long serialVersionUID = 1l;
  public InstantiatedTermPositionVector(InstantiatedDocument document, String field) {
    super(document, field);
  }
  public int[] getTermPositions(int index) {
    return getTermDocumentInformations().get(index).getTermPositions();
  }
  public TermVectorOffsetInfo[] getOffsets(int index) {
    return getTermDocumentInformations().get(index).getTermOffsets();
  }
 }
--- a/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermPositions.java
+++ b/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermPositions.java
@ -0,0 +1,100 @@
 package org.apache.lucene.store.instantiated;
 /**
 * Copyright 2006 The Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import org.apache.lucene.index.TermPositions;
 import java.io.IOException;
 /**
 * A {@link org.apache.lucene.index.TermPositions} navigating an {@link InstantiatedIndexReader}.
 */
 public class InstantiatedTermPositions
    extends InstantiatedTermDocs
    implements TermPositions {
  public int getPayloadLength() {
    return currentDocumentInformation.getPayloads()[currentTermPositionIndex].length;
  }
  public byte[] getPayload(byte[] data, int offset) throws IOException {
    byte[] payloads = currentDocumentInformation.getPayloads()[currentTermPositionIndex];
    // read payloads lazily
    if (data == null || data.length - offset < getPayloadLength()) {
      // the array is too small to store the payload data,
      return payloads;
    } else {
      System.arraycopy(payloads, 0, data, offset, payloads.length);
      return data;
    }
  }
  public boolean isPayloadAvailable() {
    return currentDocumentInformation.getPayloads()[currentTermPositionIndex] != null;
  }
  public InstantiatedTermPositions(InstantiatedIndexReader reader) {
    super(reader);
  }
  /**
   * Returns next position in the current document.  It is an error to call
   * this more than {@link #freq()} times
   * without calling {@link #next()}<p> This is
   * invalid until {@link #next()} is called for
   * the first time.
   */
  public int nextPosition() {
    currentTermPositionIndex++;
    // if you get an array out of index exception here,
    // it might be due to currentDocumentInformation.getIndexFromTerm not beeing set!!
    return currentDocumentInformation.getTermPositions()[currentTermPositionIndex];
  }
  private int currentTermPositionIndex;
  /**
   * Moves to the next pair in the enumeration.
   * <p> Returns true if there is such a next pair in the enumeration.
   */
  @Override
  public boolean next() {
    currentTermPositionIndex = -1;
    return super.next();
  }
  /**
   * Skips entries to the first beyond the current whose document number is
   * greater than or equal to <currentTermPositionIndex>target</currentTermPositionIndex>. <p>Returns true iff there is such
   * an entry.  <p>Behaves as if written: <pre>
   *   boolean skipTo(int target) {
   *     do {
   *       if (!next())
   * 	     return false;
   *     } while (target > doc());
   *     return true;
   *   }
   * </pre>
   * Some implementations are considerably more efficient than that.
   */
  @Override
  public boolean skipTo(int target) {
    currentTermPositionIndex = -1;
    return super.skipTo(target);
  }
 }
--- a/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/package.html
+++ b/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/package.html
@ -0,0 +1,90 @@
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
 <html>
 <!--**
 * Copyright 2005 The Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 -->
 <head>
  <title>InstantiatedIndex</title>
 </head>
 <body>
 <p>WARNING: This contrib is experimental and the APIs may change without warning.</p>
 <h2>Abstract</h2>
 <p>
  Represented as a coupled graph of class instances, this
  all-in-memory index store implementation delivers search
  results up to a 100 times faster than the file-centric RAMDirectory
  at the cost of greater RAM consumption.
 </p>
 <h2>API</h2>
 <p>
  Just as the default store implementation, InstantiatedIndex
  comes with an IndexReader and IndexWriter. The latter share
  many method signatures with the file-centric IndexWriter.
 </p>
 <p>
  It is also possible to load the content of another index
  by passing an IndexReader to the InstantiatedIndex constructor.
 </p>
 <h2>Performance</h2>
 <p>
  At a few thousand ~160 characters long documents
  InstantiaedIndex outperforms RAMDirectory some 50x,
  15x at 100 documents of 2000 charachters length,
  and is linear to RAMDirectory at 10,000 documents of 2000 characters length.
 </p>
 <p>Mileage may vary depending on term saturation.</p>
 <p>
  Populated with a single document InstantiatedIndex is almost, but not quite, as fast as MemoryIndex.    
 </p>
 <p>
  It takes more or less the same time to populate an InstantiatedIndex
  as it takes to populate a RAMDirectory. Hardly any effort has been put
  in to optimizing the InstantiatedIndexWriter, only minimizing the amount
  of time needed to write-lock the index has been considered.
 </p>
 <h2>Caveats</h2>
 <ul>
  <li>No locks! Consider using InstantiatedIndex as if it was immutable.</li>
  <li>No documents with fields containing readers!</li>
  <li>Only FieldOption.All allowed by IndexReader#getFieldNames(FieldOption).</li>
  <li>No field selection when retrieving documents, as all stored field are available in memory.</li>
 </ul>
 <h2>Use cases</h2>
 <p>
  Could replace any small index that could do with greater response time.
  spell check a priori index,
  the index of new documents exposed to user search agent queries,
  to compile classifiers in machine learning environments, et c.
 </p>
 <h2>Class diagram</h2>
 <a href="../../../../../../../docs/classdiagram.png"><img width="640px" height="480px" src="../../../../../../../docs/classdiagram.png" alt="class diagram"></a>
 <br/>
 <a href="../../../../../../../docs/classdiagram.uxf">Diagram</a> rendered using <a href="http://umlet.com">UMLet</a> 7.1.
 </body>
 </html>
--- a/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java
+++ b/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java
@ -0,0 +1,424 @@
 package org.apache.lucene.store.instantiated;
 /**
 * Copyright 2006 The Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import junit.framework.TestCase;
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.index.*;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.RAMDirectory;
 import java.io.IOException;
 import java.util.*;
 /**
 * Asserts equality of content and behaviour of two index readers.
 */
 public class TestIndicesEquals extends TestCase {
 //  public void test2() throws Exception {
 //    FSDirectory fsdir = FSDirectory.getDirectory("/tmp/fatcorpus");
 //    IndexReader ir = IndexReader.open(fsdir);
 //    InstantiatedIndex ii = new InstantiatedIndex(ir);
 //    ir.close();
 //    testEquals(fsdir, ii);
 //  }
  public void testLoadIndexReader() throws Exception {
    RAMDirectory dir = new RAMDirectory();
    // create dir data
    IndexWriter indexWriter = new IndexWriter(dir, new StandardAnalyzer(), true);
    for (int i = 0; i < 5; i++) {
      Document document = new Document();
      assembleDocument(document, i);
      indexWriter.addDocument(document);
    }
    indexWriter.close();
    // test load ii from index reader
    IndexReader ir = IndexReader.open(dir);
    InstantiatedIndex ii = new InstantiatedIndex(ir);
    ir.close();
    testEquals(dir, ii);
  }
  public void testInstantiatedIndexWriter() throws Exception {
    RAMDirectory dir = new RAMDirectory();
    InstantiatedIndex ii = new InstantiatedIndex();
    // create dir data
    IndexWriter indexWriter = new IndexWriter(dir, new StandardAnalyzer(), true);
    for (int i = 0; i < 500; i++) {
      Document document = new Document();
      assembleDocument(document, i);
      indexWriter.addDocument(document);
    }
    indexWriter.close();
    // test ii writer
    InstantiatedIndexWriter instantiatedIndexWriter = ii.indexWriterFactory(new StandardAnalyzer(), true);
    for (int i = 0; i < 500; i++) {
      Document document = new Document();
      assembleDocument(document, i);
      instantiatedIndexWriter.addDocument(document);
    }
    instantiatedIndexWriter.close();
    testEquals(dir, ii);
    testTermDocs(dir, ii);
  }
  private void testTermDocs(Directory aprioriIndex, InstantiatedIndex testIndex) throws Exception {
    IndexReader aprioriReader = IndexReader.open(aprioriIndex);
    IndexReader testReader = testIndex.indexReaderFactory();
    TermEnum aprioriTermEnum = aprioriReader.terms(new Term("c", "danny"));
    TermDocs aprioriTermDocs = aprioriReader.termDocs(aprioriTermEnum.term());
    TermDocs testTermDocs = testReader.termDocs(aprioriTermEnum.term());
    assertEquals(aprioriTermDocs.next(), testTermDocs.next());
    assertEquals(aprioriTermDocs.doc(), testTermDocs.doc());
    assertEquals(aprioriTermDocs.skipTo(100), testTermDocs.skipTo(100));
    assertEquals(aprioriTermDocs.doc(), testTermDocs.doc());
    assertEquals(aprioriTermDocs.next(), testTermDocs.next());
    assertEquals(aprioriTermDocs.doc(), testTermDocs.doc());
    assertEquals(aprioriTermDocs.next(), testTermDocs.next());
    assertEquals(aprioriTermDocs.doc(), testTermDocs.doc());
    assertEquals(aprioriTermDocs.skipTo(110), testTermDocs.skipTo(110));
    assertEquals(aprioriTermDocs.doc(), testTermDocs.doc());
    assertEquals(aprioriTermDocs.skipTo(10), testTermDocs.skipTo(10));
    assertEquals(aprioriTermDocs.doc(), testTermDocs.doc());
    assertEquals(aprioriTermDocs.skipTo(210), testTermDocs.skipTo(210));
    assertEquals(aprioriTermDocs.doc(), testTermDocs.doc());
    aprioriTermDocs.close();
    aprioriReader.close();
    testTermDocs.close();
    testReader.close();
  }
  private void assembleDocument(Document document, int i) {
    document.add(new Field("a", i + " Do you really want to go and live in that house all winter?", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
    if (i > 0) {
      document.add(new Field("b0", i + " All work and no play makes Jack a dull boy", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
      document.add(new Field("b1", i + " All work and no play makes Jack a dull boy", Field.Store.YES, Field.Index.NO_NORMS, Field.TermVector.NO));
      document.add(new Field("b2", i + " All work and no play makes Jack a dull boy", Field.Store.NO, Field.Index.UN_TOKENIZED, Field.TermVector.NO));
      document.add(new Field("b3", i + " All work and no play makes Jack a dull boy", Field.Store.YES, Field.Index.NO, Field.TermVector.NO));
      if (i > 1) {
        document.add(new Field("c", i + " Redrum redrum", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
        if (i > 2) {
          document.add(new Field("d", i + " Hello Danny, come and play with us... forever and ever. and ever.", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
          if (i > 3) {
            Field f = new Field("e", i + " Heres Johnny!", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
            f.setOmitNorms(true);
            document.add(f);
            if (i > 4) {
              final List<Token> tokens = new ArrayList<Token>(2);
              Token t = new Token("the", 0, 2, "text");
              t.setPayload(new Payload(new byte[]{1, 2, 3}));
              tokens.add(t);
              t = new Token("end", 3, 5, "text");
              t.setPayload(new Payload(new byte[]{2}));
              tokens.add(t);
              tokens.add(new Token("fin", 7, 9));
              document.add(new Field("f", new TokenStream() {
                Iterator<Token> it = tokens.iterator();
                public Token next() throws IOException {
                  if (!it.hasNext()) {
                    return null;
                  }
                  return it.next();
                }
                public void reset() throws IOException {
                  it = tokens.iterator();
                }
              }));
            }
          }
        }
      }
    }
  }
  /**
   * Asserts that the content of two index readers equal each other.
   *
   * @param aprioriIndex the index that is known to be correct
   * @param testIndex    the index that is supposed to equals the apriori index.
   * @throws Exception
   */
  protected void testEquals(Directory aprioriIndex, InstantiatedIndex testIndex) throws Exception {
    IndexReader aprioriReader = IndexReader.open(aprioriIndex);
    IndexReader testReader = testIndex.indexReaderFactory();
    assertEquals(aprioriReader.numDocs(), testReader.numDocs());
    for (Object field : aprioriReader.getFieldNames(IndexReader.FieldOption.ALL)) {
      // test norms as used by normal use
      byte[] aprioriNorms = aprioriReader.norms((String) field);
      byte[] testNorms = testReader.norms((String) field);
      assertEquals(aprioriNorms.length, testNorms.length);
      for (int i = 0; i < aprioriNorms.length; i++) {
        assertEquals("norms does not equals for field " + field + " in document " + i, aprioriNorms[i], testNorms[i]);
      }
      // test norms as used by multireader
      aprioriNorms = new byte[aprioriReader.maxDoc()];
      aprioriReader.norms((String) field, aprioriNorms, 0);
      testNorms = new byte[testReader.maxDoc()];
      testReader.norms((String) field, testNorms, 0);
      assertEquals(aprioriNorms.length, testNorms.length);
      for (int i = 0; i < aprioriNorms.length; i++) {
        assertEquals("norms does not equals for field " + field + " in document " + i, aprioriNorms[i], testNorms[i]);
      }
    }
    for (int docIndex = 0; docIndex < aprioriReader.numDocs(); docIndex++) {
      assertEquals(aprioriReader.isDeleted(docIndex), testReader.isDeleted(docIndex));
    }
    // compare term enumeration stepping
    TermEnum aprioriTermEnum = aprioriReader.terms();
    TermEnum testTermEnum = testReader.terms();
    while (true) {
      if (!aprioriTermEnum.next()) {
        assertFalse(testTermEnum.next());
        break;
      }
      assertTrue(testTermEnum.next());
      assertEquals(aprioriTermEnum.term(), testTermEnum.term());
      assertTrue(aprioriTermEnum.docFreq() == testTermEnum.docFreq());
      // compare termDocs seeking
      TermDocs aprioriTermDocsSeeker = aprioriReader.termDocs(aprioriTermEnum.term());
      TermDocs testTermDocsSeeker = testReader.termDocs(testTermEnum.term());
      while (aprioriTermDocsSeeker.next()) {
        assertTrue(testTermDocsSeeker.skipTo(aprioriTermDocsSeeker.doc()));
        assertEquals(aprioriTermDocsSeeker.doc(), testTermDocsSeeker.doc());
      }
      aprioriTermDocsSeeker.close();
      testTermDocsSeeker.close();
      // compare documents per term
      assertEquals(aprioriReader.docFreq(aprioriTermEnum.term()), testReader.docFreq(testTermEnum.term()));
      TermDocs aprioriTermDocs = aprioriReader.termDocs(aprioriTermEnum.term());
      TermDocs testTermDocs = testReader.termDocs(testTermEnum.term());
      while (true) {
        if (!aprioriTermDocs.next()) {
          assertFalse(testTermDocs.next());
          break;
        }
        assertTrue(testTermDocs.next());
        assertEquals(aprioriTermDocs.doc(), testTermDocs.doc());
        assertEquals(aprioriTermDocs.freq(), testTermDocs.freq());
      }
      aprioriTermDocs.close();
      testTermDocs.close();
      // compare term positions
      TermPositions testTermPositions = testReader.termPositions(testTermEnum.term());
      TermPositions aprioriTermPositions = aprioriReader.termPositions(aprioriTermEnum.term());
      if (aprioriTermPositions != null) {
        for (int docIndex = 0; docIndex < aprioriReader.maxDoc(); docIndex++) {
          boolean hasNext = aprioriTermPositions.next();
          if (hasNext) {
            assertTrue(testTermPositions.next());
            assertEquals(aprioriTermPositions.freq(), testTermPositions.freq());
            for (int termPositionIndex = 0; termPositionIndex < aprioriTermPositions.freq(); termPositionIndex++) {
              int aprioriPos = aprioriTermPositions.nextPosition();
              int testPos = testTermPositions.nextPosition();
              if (aprioriPos != testPos) {
                assertEquals(aprioriPos, testPos);
              }
              assertEquals(aprioriTermPositions.isPayloadAvailable(), testTermPositions.isPayloadAvailable());
              if (aprioriTermPositions.isPayloadAvailable()) {
                assertEquals(aprioriTermPositions.getPayloadLength(), testTermPositions.getPayloadLength());
                byte[] aprioriPayloads = aprioriTermPositions.getPayload(new byte[aprioriTermPositions.getPayloadLength()], 0);
                byte[] testPayloads = testTermPositions.getPayload(new byte[testTermPositions.getPayloadLength()], 0);
                for (int i = 0; i < aprioriPayloads.length; i++) {
                  assertEquals(aprioriPayloads[i], testPayloads[i]);
                }
              }
            }
          }
        }
        aprioriTermPositions.close();
        testTermPositions.close();
      }
    }
    // compare term enumeration seeking
    aprioriTermEnum = aprioriReader.terms();
    TermEnum aprioriTermEnumSeeker = aprioriReader.terms();
    TermEnum testTermEnumSeeker = testReader.terms();
    while (aprioriTermEnum.next()) {
      if (aprioriTermEnumSeeker.skipTo(aprioriTermEnum.term())) {
        assertTrue(testTermEnumSeeker.skipTo(aprioriTermEnum.term()));
        assertEquals(aprioriTermEnumSeeker.term(), testTermEnumSeeker.term());
      } else {
        assertFalse(testTermEnumSeeker.skipTo(aprioriTermEnum.term()));
      }
    }
    aprioriTermEnum.close();
    aprioriTermEnumSeeker.close();
    testTermEnumSeeker.close();
    // skip to non existing terms
    aprioriTermEnumSeeker = aprioriReader.terms();
    testTermEnumSeeker = testReader.terms();
    aprioriTermEnum = aprioriReader.terms();
    aprioriTermEnum.next();
    Term nonExistingTerm = new Term(aprioriTermEnum.term().field(), "bzzzzoo993djdj380sdf");
    aprioriTermEnum.close();
    assertEquals(aprioriTermEnumSeeker.skipTo(nonExistingTerm), testTermEnumSeeker.skipTo(nonExistingTerm));
    assertEquals(aprioriTermEnumSeeker.term(), testTermEnumSeeker.term());
    aprioriTermEnumSeeker.close();
    testTermEnumSeeker.close();
    // compare term vectors and position vectors
    for (int documentNumber = 0; documentNumber < aprioriReader.numDocs(); documentNumber++) {
      if (documentNumber > 0) {
        assertNotNull(aprioriReader.getTermFreqVector(documentNumber, "b0"));
        assertNull(aprioriReader.getTermFreqVector(documentNumber, "b1"));
        assertNotNull(testReader.getTermFreqVector(documentNumber, "b0"));
        assertNull(testReader.getTermFreqVector(documentNumber, "b1"));
      }
      TermFreqVector[] aprioriFreqVectors = aprioriReader.getTermFreqVectors(documentNumber);
      TermFreqVector[] testFreqVectors = testReader.getTermFreqVectors(documentNumber);
      if (aprioriFreqVectors != null && testFreqVectors != null) {
        Arrays.sort(aprioriFreqVectors, new Comparator<TermFreqVector>() {
          public int compare(TermFreqVector termFreqVector, TermFreqVector termFreqVector1) {
            return termFreqVector.getField().compareTo(termFreqVector1.getField());
          }
        });
        Arrays.sort(testFreqVectors, new Comparator<TermFreqVector>() {
          public int compare(TermFreqVector termFreqVector, TermFreqVector termFreqVector1) {
            return termFreqVector.getField().compareTo(termFreqVector1.getField());
          }
        });
        assertEquals("document " + documentNumber + " vectors does not match", aprioriFreqVectors.length, testFreqVectors.length);
        for (int freqVectorIndex = 0; freqVectorIndex < aprioriFreqVectors.length; freqVectorIndex++) {
          assertTrue(Arrays.equals(aprioriFreqVectors[freqVectorIndex].getTermFrequencies(), testFreqVectors[freqVectorIndex].getTermFrequencies()));
          assertTrue(Arrays.equals(aprioriFreqVectors[freqVectorIndex].getTerms(), testFreqVectors[freqVectorIndex].getTerms()));
          if (aprioriFreqVectors[freqVectorIndex] instanceof TermPositionVector) {
            TermPositionVector aprioriTermPositionVector = (TermPositionVector) aprioriFreqVectors[freqVectorIndex];
            TermPositionVector testTermPositionVector = (TermPositionVector) testFreqVectors[freqVectorIndex];
            for (int positionVectorIndex = 0; positionVectorIndex < aprioriFreqVectors[freqVectorIndex].getTerms().length; positionVectorIndex++)
            {
              if (aprioriTermPositionVector.getOffsets(positionVectorIndex) != null) {
                assertTrue(Arrays.equals(aprioriTermPositionVector.getOffsets(positionVectorIndex), testTermPositionVector.getOffsets(positionVectorIndex)));
              }
              if (aprioriTermPositionVector.getTermPositions(positionVectorIndex) != null) {
                assertTrue(Arrays.equals(aprioriTermPositionVector.getTermPositions(positionVectorIndex), testTermPositionVector.getTermPositions(positionVectorIndex)));
              }
            }
          }
        }
      }
    }
    aprioriTermEnum.close();
    testTermEnum.close();
    aprioriReader.close();
    testReader.close();
  }
 }