From b8fc54e72aa18176763648f6021e7f1d184defa1 Mon Sep 17 00:00:00 2001
From: Karl-Johan Wettin <kalle@apache.org>
Date: Sat, 28 Jun 2008 17:23:35 +0000
Subject: [PATCH] LUCENE-1312: Added full support for
 InstantiatedIndexReader#getFieldNames() and extended the test case to assert
 deleted documents behaves as they should (they did).

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@672556 13f79535-47bb-0310-9956-ffa450edef68
---
 contrib/instantiated/CHANGES.txt              |  33 ++++
 .../store/instantiated/FieldSetting.java      |  61 ++++++
 .../store/instantiated/FieldSettings.java     |  95 ++++++++++
 .../store/instantiated/InstantiatedIndex.java |  90 ++++++++-
 .../instantiated/InstantiatedIndexReader.java | 176 ++++++++++++------
 .../instantiated/InstantiatedIndexWriter.java | 165 ++++++++--------
 .../instantiated/InstantiatedTermDocs.java    |   5 -
 .../instantiated/InstantiatedTermEnum.java    |   2 +-
 .../lucene/store/instantiated/package.html    |   5 +-
 .../store/instantiated/TestIndicesEquals.java |  37 +++-
 10 files changed, 512 insertions(+), 157 deletions(-)
 create mode 100644 contrib/instantiated/CHANGES.txt
 create mode 100644 contrib/instantiated/src/java/org/apache/lucene/store/instantiated/FieldSetting.java
 create mode 100644 contrib/instantiated/src/java/org/apache/lucene/store/instantiated/FieldSettings.java

diff --git a/contrib/instantiated/CHANGES.txt b/contrib/instantiated/CHANGES.txt
new file mode 100644
index 00000000000..3d777fe843a
--- /dev/null
+++ b/contrib/instantiated/CHANGES.txt
@@ -0,0 +1,33 @@
+Lucene InstantiatedIndex contrib module change Log
+
+======================= Trunk (not yet released) =======================
+
+Changes in runtime behavior
+
+ (None)
+
+API Changes
+
+ (None)
+
+Bug fixes
+
+ 1. LUCENE-1312: Added full support for InstantiatedIndexReader#getFieldNames()
+    and tests that assert that deleted documents behaves as they should (they did).
+    (Jason Rutherglen, Karl Wettin)
+
+New features
+
+ (None)
+
+Documentation
+
+ (None)
+
+Build
+
+ (None)
+
+Test Cases
+
+ (None)
\ No newline at end of file
diff --git a/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/FieldSetting.java b/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/FieldSetting.java
new file mode 100644
index 00000000000..34d96bf710c
--- /dev/null
+++ b/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/FieldSetting.java
@@ -0,0 +1,61 @@
+package org.apache.lucene.store.instantiated;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * For non package access see {@link org.apache.lucene.index.IndexReader#getFieldNames(org.apache.lucene.index.IndexReader.FieldOption)} 
+ */
+class FieldSetting {
+  String fieldName;
+
+  boolean storeTermVector = false;
+  boolean storeOffsetWithTermVector = false;
+  boolean storePositionWithTermVector = false;
+  boolean storePayloads = false;
+
+  boolean stored = false;
+  boolean indexed = false;
+  boolean tokenized = false;
+  boolean compressed = false;
+
+  FieldSetting() {
+  }
+
+
+  FieldSetting(String fieldName) {
+    this.fieldName = fieldName;
+  }
+
+  public boolean equals(Object o) {
+    if (this == o)
+      return true;
+    if (o == null || getClass() != o.getClass())
+      return false;
+
+    final FieldSetting that = (FieldSetting) o;
+
+    return fieldName.equals(that.fieldName);
+
+  }
+
+  public int hashCode() {
+    return fieldName.hashCode();
+  }
+
+
+}
diff --git a/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/FieldSettings.java b/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/FieldSettings.java
new file mode 100644
index 00000000000..99b4ace7ad6
--- /dev/null
+++ b/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/FieldSettings.java
@@ -0,0 +1,95 @@
+package org.apache.lucene.store.instantiated;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Collection;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Essetially a Map<FieldName, {@link org.apache.lucene.store.instantiated.FieldSetting}> 
+ */
+class FieldSettings {
+
+
+  FieldSettings() {
+  }
+
+  private Map</** field name */String, FieldSetting> fieldSettings = new HashMap<String, FieldSetting>();
+
+  synchronized FieldSetting merge(FieldSetting fieldSetting) {
+    FieldSetting setting = fieldSettings.get(fieldSetting.fieldName);
+
+    if (setting == null) {
+      setting = new FieldSetting(fieldSetting.fieldName);
+      fieldSettings.put(fieldSetting.fieldName, setting);
+    }
+
+    if (fieldSetting.stored) {
+      setting.stored = true;
+    }
+    if (fieldSetting.compressed) {
+      setting.compressed = true;
+    }
+
+    if ("b3".equals(fieldSetting.fieldName)) {
+      System.currentTimeMillis();
+    }
+    if (fieldSetting.indexed) {
+      setting.indexed = true;
+    }
+    if (fieldSetting.tokenized) {
+      setting.tokenized = true;
+    }
+
+    if (fieldSetting.storeTermVector) {
+      setting.storeTermVector = true;
+    }
+    if (fieldSetting.storeOffsetWithTermVector) {
+      setting.storeOffsetWithTermVector = true;
+    }
+    if (fieldSetting.storePositionWithTermVector) {
+      setting.storePositionWithTermVector = true;
+    }
+
+    if (fieldSetting.storePayloads) {
+      setting.storePayloads = true;
+    }
+
+    return setting;
+
+  }
+
+  FieldSetting get(String name) {
+    return fieldSettings.get(name);
+  }
+
+  FieldSetting get(String name, boolean create) {
+    FieldSetting fieldSetting = fieldSettings.get(name);
+    if (create && fieldSetting == null) {
+      fieldSetting = new FieldSetting(name);
+      fieldSettings.put(name, fieldSetting);
+    }
+    return fieldSetting;
+  }
+
+  Collection<FieldSetting> values() {
+    return fieldSettings.values();
+  }
+
+}
diff --git a/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java b/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java
index 4a89dc0adca..dae534bfaf3 100644
--- a/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java
+++ b/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java
@@ -16,14 +16,24 @@ package org.apache.lucene.store.instantiated;
  * limitations under the License.
  */
 
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
-import org.apache.lucene.index.*;
-
-import java.io.IOException;
-import java.io.Serializable;
-import java.util.*;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.index.TermPositionVector;
+import org.apache.lucene.index.TermPositions;
 
 /**
  * Represented as a coupled graph of class instances, this
@@ -49,7 +59,8 @@ public class InstantiatedIndex
   private long version = System.currentTimeMillis();
 
   private InstantiatedDocument[] documentsByNumber;
-  /** todo: this should be a BitSet */
+
+  /** todo: should this be a BitSet? */
   private Set<Integer> deletedDocuments;
 
   private Map<String, Map<String, InstantiatedTerm>> termsByFieldAndText;
@@ -57,6 +68,7 @@ public class InstantiatedIndex
 
   private Map<String, byte[]> normsByFieldNameAndDocumentNumber;
 
+  private FieldSettings fieldSettings;
 
   /**
    * Creates an empty instantiated index for you to fill with data using an {@link org.apache.lucene.store.instantiated.InstantiatedIndexWriter}. 
@@ -68,12 +80,14 @@ public class InstantiatedIndex
   void initialize() {
     // todo: clear index without loosing memory (uncouple stuff)
     termsByFieldAndText = new HashMap<String, Map<String, InstantiatedTerm>>();
+    fieldSettings = new FieldSettings();
     orderedTerms = new InstantiatedTerm[0];
     documentsByNumber = new InstantiatedDocument[0];
     normsByFieldNameAndDocumentNumber = new HashMap<String, byte[]>();
     deletedDocuments = new HashSet<Integer>();
   }
 
+  
   /**
    * Creates a new instantiated index that looks just like the index in a specific state as represented by a reader.
    *
@@ -83,7 +97,9 @@ public class InstantiatedIndex
   public InstantiatedIndex(IndexReader sourceIndexReader) throws IOException {
     this(sourceIndexReader, null);
   }
+  
 
+  
   /**
    * Creates a new instantiated index that looks just like the index in a specific state as represented by a reader.
    *
@@ -97,10 +113,63 @@ public class InstantiatedIndex
       throw new IOException("Source index is not optimized.");
     }
 
-    Collection<String> allFieldNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.ALL);
 
     initialize();
 
+    Collection<String> allFieldNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.ALL);
+        
+    // load field options
+
+    Collection<String> indexedNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.INDEXED);
+    for (String name : indexedNames) {
+      FieldSetting setting = fieldSettings.get(name, true);
+      setting.indexed = true;
+    }
+    Collection<String> indexedNoVecNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.INDEXED_NO_TERMVECTOR);
+    for (String name : indexedNoVecNames) {
+      FieldSetting setting = fieldSettings.get(name, true);
+      setting.storeTermVector = false;
+      setting.indexed = true;
+    }
+    Collection<String> indexedVecNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR);
+    for (String name : indexedVecNames) {
+      FieldSetting setting = fieldSettings.get(name, true);
+      setting.storeTermVector = true;
+      setting.indexed = true;
+    }
+    Collection<String> payloadNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.STORES_PAYLOADS);
+    for (String name : payloadNames) {
+      FieldSetting setting = fieldSettings.get(name, true);
+      setting.storePayloads = true;
+    }
+    Collection<String> termVecNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR);
+    for (String name : termVecNames) {
+      FieldSetting setting = fieldSettings.get(name, true);
+      setting.storeTermVector = true;
+    }
+    Collection<String> termVecOffsetNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET);
+    for (String name : termVecOffsetNames) {
+      FieldSetting setting = fieldSettings.get(name, true);
+      setting.storeOffsetWithTermVector = true;
+    }
+    Collection<String> termVecPosNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION);
+    for (String name : termVecPosNames) {
+      FieldSetting setting = fieldSettings.get(name, true);
+      setting.storePositionWithTermVector = true;
+    }
+    Collection<String> termVecPosOffNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET);
+    for (String name : termVecPosOffNames) {
+      FieldSetting setting = fieldSettings.get(name, true);
+      setting.storeOffsetWithTermVector = true;
+      setting.storePositionWithTermVector = true;
+    }
+    Collection<String> unindexedNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.UNINDEXED);
+    for (String name : unindexedNames) {
+      FieldSetting setting = fieldSettings.get(name, true);
+      setting.indexed = false;
+    }
+
+
     documentsByNumber = new InstantiatedDocument[sourceIndexReader.numDocs()];
 
     // create documents
@@ -129,6 +198,8 @@ public class InstantiatedIndex
       }
     }
 
+
+
     // create norms
     for (String fieldName : allFieldNames) {
       if (fields == null || fields.contains(fieldName)) {
@@ -271,4 +342,9 @@ public class InstantiatedIndex
   void setVersion(long version) {
     this.version = version;
   }
+
+
+  FieldSettings getFieldSettings() {
+    return fieldSettings;
+  }
 }
diff --git a/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java b/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java
index 50f7924c0fa..ddeb9f43843 100644
--- a/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java
+++ b/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java
@@ -16,22 +16,37 @@ package org.apache.lucene.store.instantiated;
  * limitations under the License.
  */
 
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.FieldSelector;
-import org.apache.lucene.index.*;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermDocs;
+import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.index.TermFreqVector;
+import org.apache.lucene.index.TermPositions;
+import org.apache.lucene.index.TermVectorMapper;
 import org.apache.lucene.store.Directory;
 
-import java.io.IOException;
-import java.util.*;
-
 /**
- * An InstantiatedIndexReader is not a snapshot in time,
- * it is completely in sync with the latest commit to the store!
- *
+ * An InstantiatedIndexReader is not a snapshot in time, it is completely in
+ * sync with the latest commit to the store!
+ * 
  * Consider using InstantiatedIndex as if it was immutable.
  */
-public class InstantiatedIndexReader
-    extends IndexReader {
+public class InstantiatedIndexReader extends IndexReader {
 
   private final InstantiatedIndex index;
 
@@ -47,36 +62,32 @@ public class InstantiatedIndexReader
     return true;
   }
 
-
   /**
-   * An InstantiatedIndexReader is not a snapshot in time,
-   * it is completely in sync with the latest commit to the store!
-   *  
+   * An InstantiatedIndexReader is not a snapshot in time, it is completely in
+   * sync with the latest commit to the store!
+   * 
    * @return output from {@link InstantiatedIndex#getVersion()} in associated instantiated index.
    */
   public long getVersion() {
     return index.getVersion();
   }
 
-
   public Directory directory() {
     throw new UnsupportedOperationException();
   }
 
-
   /**
    * An InstantiatedIndexReader is always current!
-   *
-   * Check whether this IndexReader is still using the
-   * current (i.e., most recently committed) version of the
-   * index.  If a writer has committed any changes to the
-   * index since this reader was opened, this will return
-   * <code>false</code>, in which case you must open a new
-   * IndexReader in order to see the changes.  See the
-   * description of the <a href="IndexWriter.html#autoCommit"><code>autoCommit</code></a>
-   * flag which controls when the {@link IndexWriter}
-   * actually commits changes to the index.
-   *
+   * 
+   * Check whether this IndexReader is still using the current (i.e., most
+   * recently committed) version of the index. If a writer has committed any
+   * changes to the index since this reader was opened, this will return
+   * <code>false</code>, in which case you must open a new IndexReader in
+   * order to see the changes. See the description of the <a
+   * href="IndexWriter.html#autoCommit"><code>autoCommit</code></a> flag
+   * which controls when the {@link IndexWriter} actually commits changes to the
+   * index.
+   * 
    * @return always true
    * @throws CorruptIndexException if the index is corrupt
    * @throws IOException if there is a low-level IO error
@@ -92,7 +103,7 @@ public class InstantiatedIndexReader
 
   private Set<InstantiatedDocument> deletedDocuments = new HashSet<InstantiatedDocument>();
   private Set<Integer> deletedDocumentNumbers = new HashSet<Integer>();
-  private Map<String, List<NormUpdate>> updatedNormsByFieldNameAndDocumentNumber = null;
+  private Map<String,List<NormUpdate>> updatedNormsByFieldNameAndDocumentNumber = null;
 
   private class NormUpdate {
     private int doc;
@@ -140,7 +151,7 @@ public class InstantiatedIndexReader
 
     // 1. update norms
     if (updatedNormsByFieldNameAndDocumentNumber != null) {
-      for (Map.Entry<String, List<NormUpdate>> e : updatedNormsByFieldNameAndDocumentNumber.entrySet()) {
+      for (Map.Entry<String,List<NormUpdate>> e : updatedNormsByFieldNameAndDocumentNumber.entrySet()) {
         byte[] norms = getIndex().getNormsByFieldNameAndDocumentNumber().get(e.getKey());
         for (NormUpdate normUpdate : e.getValue()) {
           norms[normUpdate.doc] = normUpdate.value;
@@ -168,27 +179,67 @@ public class InstantiatedIndexReader
 
   protected void doClose() throws IOException {
     // ignored
+    // todo perhaps release all associated instances?
   }
 
-  public Collection getFieldNames(FieldOption fldOption) {
-    if (fldOption != FieldOption.ALL) {
-      throw new IllegalArgumentException("Only FieldOption.ALL implemented."); // todo
+  public Collection getFieldNames(FieldOption fieldOption) {
+    Set<String> fieldSet = new HashSet<String>();
+    for (FieldSetting fi : index.getFieldSettings().values()) {
+      if (fieldOption == IndexReader.FieldOption.ALL) {
+        fieldSet.add(fi.fieldName);
+      } else if (!fi.indexed && fieldOption == IndexReader.FieldOption.UNINDEXED) {
+        fieldSet.add(fi.fieldName);
+      } else if (fi.storePayloads && fieldOption == IndexReader.FieldOption.STORES_PAYLOADS) {
+        fieldSet.add(fi.fieldName);
+      } else if (fi.indexed && fieldOption == IndexReader.FieldOption.INDEXED) {
+        fieldSet.add(fi.fieldName);
+      } else if (fi.indexed && fi.storeTermVector == false && fieldOption == IndexReader.FieldOption.INDEXED_NO_TERMVECTOR) {
+        fieldSet.add(fi.fieldName);
+      } else if (fi.storeTermVector == true && fi.storePositionWithTermVector == false && fi.storeOffsetWithTermVector == false
+          && fieldOption == IndexReader.FieldOption.TERMVECTOR) {
+        fieldSet.add(fi.fieldName);
+      } else if (fi.indexed && fi.storeTermVector && fieldOption == IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR) {
+        fieldSet.add(fi.fieldName);
+      } else if (fi.storePositionWithTermVector && fi.storeOffsetWithTermVector == false
+          && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION) {
+        fieldSet.add(fi.fieldName);
+      } else if (fi.storeOffsetWithTermVector && fi.storePositionWithTermVector == false
+          && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET) {
+        fieldSet.add(fi.fieldName);
+      } else if ((fi.storeOffsetWithTermVector && fi.storePositionWithTermVector)
+          && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET) {
+        fieldSet.add(fi.fieldName);
+      } 
     }
-    return new ArrayList<String>(getIndex().getTermsByFieldAndText().keySet());
+    return fieldSet;
   }
 
-
   /**
-   * This implementation ignores the field selector! All fields are always returned
+   * Return the {@link org.apache.lucene.document.Document} at the <code>n</code><sup>th</sup>
+   * position.
+     <p>
+   * <b>Warning!</b>
+   * The resulting document is the actual stored document instance
+   * and not a deserialized clone as retuned by an IndexReader
+   * over a {@link org.apache.lucene.store.Directory}.
+   * I.e., if you need to touch the document, clone it first!
+   * <p>
+   * This can also be seen as a feature for live canges of stored values,
+   * but be carful! Adding a field with an name unknown to the index
+   * or to a field with previously no stored values will make
+   * {@link org.apache.lucene.store.instantiated.InstantiatedIndexReader#getFieldNames(org.apache.lucene.index.IndexReader.FieldOption)}
+   * out of sync, causing problems for instance when merging the
+   * instantiated index to another index.
+     <p>
+   * This implementation ignores the field selector! All stored fields are always returned!
+   * <p>
    *
-   * Get the {@link org.apache.lucene.document.Document} at the <code>n</code><sup>th</sup> position.
-   *
-   * @param n Get the document at the <code>n</code><sup>th</sup> position
+   * @param n document number
    * @param fieldSelector ignored
    * @return The stored fields of the {@link org.apache.lucene.document.Document} at the nth position
    * @throws CorruptIndexException if the index is corrupt
    * @throws IOException if there is a low-level IO error
-   *
+   * 
    * @see org.apache.lucene.document.Fieldable
    * @see org.apache.lucene.document.FieldSelector
    * @see org.apache.lucene.document.SetBasedFieldSelector
@@ -198,19 +249,34 @@ public class InstantiatedIndexReader
     return document(n);
   }
 
+  /**
+   * Returns the stored fields of the <code>n</code><sup>th</sup>
+   * <code>Document</code> in this index.
+   * <p>
+   * <b>Warning!</b>
+   * The resulting document is the actual stored document instance
+   * and not a deserialized clone as retuned by an IndexReader
+   * over a {@link org.apache.lucene.store.Directory}.
+   * I.e., if you need to touch the document, clone it first!
+   * <p>
+   * This can also be seen as a feature for live canges of stored values,
+   * but be carful! Adding a field with an name unknown to the index
+   * or to a field with previously no stored values will make
+   * {@link org.apache.lucene.store.instantiated.InstantiatedIndexReader#getFieldNames(org.apache.lucene.index.IndexReader.FieldOption)}
+   * out of sync, causing problems for instance when merging the
+   * instantiated index to another index.
+   *
+   * @throws CorruptIndexException if the index is corrupt
+   * @throws IOException if there is a low-level IO error
+   */
+
   public Document document(int n) throws IOException {
-    if ((deletedDocumentNumbers != null
-        && deletedDocumentNumbers.contains(n))
-        ||
-        (getIndex().getDeletedDocuments() != null
-            && getIndex().getDeletedDocuments().contains(n))) {
-      return null;
-    }
-    return getIndex().getDocumentsByNumber()[n].getDocument();
+    return isDeleted(n) ? null : getIndex().getDocumentsByNumber()[n].getDocument();
   }
 
   /**
-   * never ever touch these values. it is the true values, unless norms have been touched.
+   * never ever touch these values. it is the true values, unless norms have
+   * been touched.
    */
   public byte[] norms(String field) throws IOException {
     byte[] norms = getIndex().getNormsByFieldNameAndDocumentNumber().get(field);
@@ -233,7 +299,7 @@ public class InstantiatedIndexReader
 
   protected void doSetNorm(int doc, String field, byte value) throws IOException {
     if (updatedNormsByFieldNameAndDocumentNumber == null) {
-      updatedNormsByFieldNameAndDocumentNumber = new HashMap<String, List<NormUpdate>>(getIndex().getNormsByFieldNameAndDocumentNumber().size());
+      updatedNormsByFieldNameAndDocumentNumber = new HashMap<String,List<NormUpdate>>(getIndex().getNormsByFieldNameAndDocumentNumber().size());
     }
     List<NormUpdate> list = updatedNormsByFieldNameAndDocumentNumber.get(field);
     if (list == null) {
@@ -252,7 +318,6 @@ public class InstantiatedIndexReader
     }
   }
 
-
   public TermEnum terms() throws IOException {
     return new InstantiatedTermEnum(this);
   }
@@ -260,11 +325,11 @@ public class InstantiatedIndexReader
   public TermEnum terms(Term t) throws IOException {
     InstantiatedTerm it = getIndex().findTerm(t);
     if (it != null) {
-      return new InstantiatedTermEnum(this, it.getTermIndex());      
+      return new InstantiatedTermEnum(this, it.getTermIndex());
     } else {
       int startPos = Arrays.binarySearch(index.getOrderedTerms(), t, InstantiatedTerm.termComparator);
       if (startPos < 0) {
-        startPos = -1 -startPos;
+        startPos = -1 - startPos;
       }
       return new InstantiatedTermEnum(this, startPos);
     }
@@ -293,19 +358,16 @@ public class InstantiatedIndexReader
 
   public TermFreqVector getTermFreqVector(int docNumber, String field) throws IOException {
     InstantiatedDocument doc = getIndex().getDocumentsByNumber()[docNumber];
-    if (doc.getVectorSpace() == null
-        || doc.getVectorSpace().get(field) == null) {
+    if (doc.getVectorSpace() == null || doc.getVectorSpace().get(field) == null) {
       return null;
     } else {
       return new InstantiatedTermPositionVector(doc, field);
     }
   }
 
-
   public void getTermFreqVector(int docNumber, String field, TermVectorMapper mapper) throws IOException {
     InstantiatedDocument doc = getIndex().getDocumentsByNumber()[docNumber];
-    if (doc.getVectorSpace() != null
-        && doc.getVectorSpace().get(field) == null) {
+    if (doc.getVectorSpace() != null && doc.getVectorSpace().get(field) == null) {
       List<InstantiatedTermDocumentInformation> tv = doc.getVectorSpace().get(field);
       mapper.setExpectations(field, tv.size(), true, true);
       for (InstantiatedTermDocumentInformation tdi : tv) {
@@ -316,7 +378,7 @@ public class InstantiatedIndexReader
 
   public void getTermFreqVector(int docNumber, TermVectorMapper mapper) throws IOException {
     InstantiatedDocument doc = getIndex().getDocumentsByNumber()[docNumber];
-    for (Map.Entry<String, List<InstantiatedTermDocumentInformation>> e : doc.getVectorSpace().entrySet()) {
+    for (Map.Entry<String,List<InstantiatedTermDocumentInformation>> e : doc.getVectorSpace().entrySet()) {
       mapper.setExpectations(e.getKey(), e.getValue().size(), true, true);
       for (InstantiatedTermDocumentInformation tdi : e.getValue()) {
         mapper.map(tdi.getTerm().text(), tdi.getTermPositions().length, tdi.getTermOffsets(), tdi.getTermPositions());
diff --git a/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java b/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java
index 51088b4390b..d11b8a4c1c3 100644
--- a/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java
+++ b/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java
@@ -16,6 +16,22 @@ package org.apache.lucene.store.instantiated;
  * limitations under the License.
  */
 
+import java.io.IOException;
+import java.io.PrintStream;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenStream;
@@ -28,11 +44,6 @@ import org.apache.lucene.index.TermVectorOffsetInfo;
 import org.apache.lucene.search.DefaultSimilarity;
 import org.apache.lucene.search.Similarity;
 
-import java.io.IOException;
-import java.io.PrintStream;
-import java.io.StringReader;
-import java.util.*;
-
 /**
  * This class, similar to {@link org.apache.lucene.index.IndexWriter}, has no locking mechanism.
  * 
@@ -161,6 +172,11 @@ public class InstantiatedIndexWriter {
 
     boolean orderedTermsDirty = false;
     Set<InstantiatedTerm> dirtyTerms = new HashSet<InstantiatedTerm>(1000);
+    
+    Map<String, FieldSetting> fieldSettingsByFieldName = new HashMap<String, FieldSetting>();
+    for (String fieldName : fieldNameBuffer) {
+      fieldSettingsByFieldName.put(fieldName, new FieldSetting(fieldName));
+    }
 
     InstantiatedDocument[] documentsByNumber = new InstantiatedDocument[index.getDocumentsByNumber().length + termDocumentInformationFactoryByDocument.size()];
     System.arraycopy(index.getDocumentsByNumber(), 0, documentsByNumber, 0, index.getDocumentsByNumber().length);
@@ -215,7 +231,7 @@ public class InstantiatedIndexWriter {
         }
         termsInDocument += eFieldTermDocInfoFactoriesByTermText.getValue().size();
 
-        if (eFieldTermDocInfoFactoriesByTermText.getKey().isIndexed && !eFieldTermDocInfoFactoriesByTermText.getKey().omitNorms) {
+        if (eFieldTermDocInfoFactoriesByTermText.getKey().indexed && !eFieldTermDocInfoFactoriesByTermText.getKey().omitNorms) {
           float norm = eFieldTermDocInfoFactoriesByTermText.getKey().boost;
           norm *= document.getDocument().getBoost();
           norm *= similarity.lengthNorm(eFieldTermDocInfoFactoriesByTermText.getKey().fieldName, eFieldTermDocInfoFactoriesByTermText.getKey().fieldLength);
@@ -340,6 +356,7 @@ public class InstantiatedIndexWriter {
         }
 
       }
+      fieldSettingsByFieldName.putAll(documentFieldSettingsByFieldName);
     }
 
     // order document informations in dirty terms
@@ -358,6 +375,9 @@ public class InstantiatedIndexWriter {
     index.setDocumentsByNumber(documentsByNumber);
     index.setOrderedTerms(orderedTerms.toArray(new InstantiatedTerm[orderedTerms.size()]));
 
+    for (FieldSetting fieldSetting : fieldSettingsByFieldName.values()) {
+      index.getFieldSettings().merge(fieldSetting);
+    }
     // set term index
     if (orderedTermsDirty) {
       // todo optimize, only update from start position
@@ -434,45 +454,46 @@ public class InstantiatedIndexWriter {
 
     Map<String /* field name */, FieldSetting> fieldSettingsByFieldName = new HashMap<String, FieldSetting>();
     for (Field field : (List<Field>) document.getDocument().getFields()) {
-      FieldSetting fieldSettings = fieldSettingsByFieldName.get(field.name());
-      if (fieldSettings == null) {
-        fieldSettings = new FieldSetting();
-        fieldSettings.fieldName = field.name().intern();
-        fieldSettingsByFieldName.put(fieldSettings.fieldName, fieldSettings);
-        fieldNameBuffer.add(fieldSettings.fieldName);
+      FieldSetting fieldSetting = fieldSettingsByFieldName.get(field.name());
+      if (fieldSetting == null) {
+        fieldSetting = new FieldSetting();
+        fieldSetting.fieldName = field.name().intern();
+        fieldSettingsByFieldName.put(fieldSetting.fieldName, fieldSetting);
+        fieldNameBuffer.add(fieldSetting.fieldName);
       }
 
       // todo: fixme: multiple fields with the same name does not mean field boost += more boost.
-      fieldSettings.boost *= field.getBoost();
+      fieldSetting.boost *= field.getBoost();
       //fieldSettings.dimensions++;
 
+
       // once fieldSettings, always fieldSettings.
-      if (field.getOmitNorms() != fieldSettings.omitNorms) {
-        fieldSettings.omitNorms = true;
+      if (field.getOmitNorms()) {
+        fieldSetting.omitNorms = true;
       }
-      if (field.isIndexed() != fieldSettings.isIndexed) {
-        fieldSettings.isIndexed = true;
+      if (field.isIndexed() ) {
+        fieldSetting.indexed = true;
       }
-      if (field.isTokenized() != fieldSettings.isTokenized) {
-        fieldSettings.isTokenized = true;
+      if (field.isTokenized()) {
+        fieldSetting.tokenized = true;
       }
-      if (field.isCompressed() != fieldSettings.isCompressed) {
-        fieldSettings.isCompressed = true;
+      if (field.isCompressed()) {
+        fieldSetting.compressed = true;
       }
-      if (field.isStored() != fieldSettings.isStored) {
-        fieldSettings.isStored = true;
+      if (field.isStored()) {
+        fieldSetting.stored = true;
       }
-      if (field.isBinary() != fieldSettings.isBinary) {
-        fieldSettings.isBinary = true;
+      if (field.isBinary()) {
+        fieldSetting.isBinary = true;
       }
-      if (field.isTermVectorStored() != fieldSettings.storeTermVector) {
-        fieldSettings.storeTermVector = true;
+      if (field.isTermVectorStored()) {
+        fieldSetting.storeTermVector = true;
       }
-      if (field.isStorePositionWithTermVector() != fieldSettings.storePositionWithTermVector) {
-        fieldSettings.storePositionWithTermVector = true;
+      if (field.isStorePositionWithTermVector()) {
+        fieldSetting.storePositionWithTermVector = true;
       }
-      if (field.isStoreOffsetWithTermVector() != fieldSettings.storeOffsetWithTermVector) {
-        fieldSettings.storeOffsetWithTermVector = true;
+      if (field.isStoreOffsetWithTermVector()) {
+        fieldSetting.storeOffsetWithTermVector = true;
       }
     }
 
@@ -483,7 +504,7 @@ public class InstantiatedIndexWriter {
 
       Field field = it.next();
 
-      FieldSetting fieldSettings = fieldSettingsByFieldName.get(field.name());
+      FieldSetting fieldSetting = fieldSettingsByFieldName.get(field.name());
 
       if (field.isIndexed()) {
 
@@ -505,15 +526,15 @@ public class InstantiatedIndexWriter {
             next.setTermText(next.termText().intern()); // todo: not sure this needs to be interned?
             tokens.add(next); // the vector will be built on commit.
             next = tokenStream.next();
-            fieldSettings.fieldLength++;
-            if (fieldSettings.fieldLength > maxFieldLength) {
+            fieldSetting.fieldLength++;
+            if (fieldSetting.fieldLength > maxFieldLength) {
               break;
             }
           }
         } else {
           // untokenized
           tokens.add(new Token(field.stringValue().intern(), 0, field.stringValue().length(), "untokenized"));
-          fieldSettings.fieldLength++;
+          fieldSetting.fieldLength++;
         }
       }
 
@@ -528,7 +549,7 @@ public class InstantiatedIndexWriter {
 
     // build term vector, term positions and term offsets
     for (Map.Entry<Field, LinkedList<Token>> eField_Tokens : tokensByField.entrySet()) {
-      FieldSetting fieldSettings = fieldSettingsByFieldName.get(eField_Tokens.getKey().name());
+      FieldSetting fieldSetting = fieldSettingsByFieldName.get(eField_Tokens.getKey().name());
 
       Map<String, TermDocumentInformationFactory> termDocumentInformationFactoryByTermText = termDocumentInformationFactoryByTermTextAndFieldSetting.get(fieldSettingsByFieldName.get(eField_Tokens.getKey().name()));
       if (termDocumentInformationFactoryByTermText == null) {
@@ -539,9 +560,9 @@ public class InstantiatedIndexWriter {
       int lastOffset = 0;
 
       // for each new field, move positions a bunch.
-      if (fieldSettings.position > 0) {
+      if (fieldSetting.position > 0) {
         // todo what if no analyzer set, multiple fields with same name and index without tokenization?
-        fieldSettings.position += analyzer.getPositionIncrementGap(fieldSettings.fieldName);
+        fieldSetting.position += analyzer.getPositionIncrementGap(fieldSetting.fieldName);
       }
 
       for (Token token : eField_Tokens.getValue()) {
@@ -553,26 +574,27 @@ public class InstantiatedIndexWriter {
         }
         //termDocumentInformationFactory.termFrequency++;
 
-        fieldSettings.position += (token.getPositionIncrement() - 1);
-        termDocumentInformationFactory.termPositions.add(fieldSettings.position++);
+        fieldSetting.position += (token.getPositionIncrement() - 1);
+        termDocumentInformationFactory.termPositions.add(fieldSetting.position++);
 
         if (token.getPayload() != null && token.getPayload().length() > 0) {
           termDocumentInformationFactory.payloads.add(token.getPayload().toByteArray());
+          fieldSetting.storePayloads = true;
         } else {
           termDocumentInformationFactory.payloads.add(null);
         }
 
         if (eField_Tokens.getKey().isStoreOffsetWithTermVector()) {
 
-          termDocumentInformationFactory.termOffsets.add(new TermVectorOffsetInfo(fieldSettings.offset + token.startOffset(), fieldSettings.offset + token.endOffset()));
-          lastOffset = fieldSettings.offset + token.endOffset();
+          termDocumentInformationFactory.termOffsets.add(new TermVectorOffsetInfo(fieldSetting.offset + token.startOffset(), fieldSetting.offset + token.endOffset()));
+          lastOffset = fieldSetting.offset + token.endOffset();
         }
 
 
       }
 
       if (eField_Tokens.getKey().isStoreOffsetWithTermVector()) {
-        fieldSettings.offset = lastOffset + 1;
+        fieldSetting.offset = lastOffset + 1;
       }
 
     }
@@ -631,45 +653,6 @@ public class InstantiatedIndexWriter {
     return analyzer;
   }
 
-
-  private class FieldSetting {
-    private String fieldName;
-
-    private float boost = 1;
-    //private int dimensions = 0; // this is futuristic
-    private int position = 0;
-    private int offset;
-    private int fieldLength = 0;
-
-    private boolean storeTermVector = false;
-    private boolean storeOffsetWithTermVector = false;
-    private boolean storePositionWithTermVector = false;
-    private boolean omitNorms = false;
-    private boolean isTokenized = false;
-
-    private boolean isStored = false;
-    private boolean isIndexed = false;
-    private boolean isBinary = false;
-    private boolean isCompressed = false;
-
-    //private float norm;
-    //private byte encodedNorm;
-
-    public boolean equals(Object o) {
-      if (this == o) return true;
-      if (o == null || getClass() != o.getClass()) return false;
-
-      final FieldSetting that = (FieldSetting) o;
-
-      return fieldName.equals(that.fieldName);
-
-    }
-
-    public int hashCode() {
-      return fieldName.hashCode();
-    }
-  }
-
   private class TermDocumentInformationFactory {
     private LinkedList<byte[]> payloads = new LinkedList<byte[]>();
     private LinkedList<Integer> termPositions = new LinkedList<Integer>();
@@ -677,5 +660,23 @@ public class InstantiatedIndexWriter {
   }
 
 
+  static class FieldSetting extends org.apache.lucene.store.instantiated.FieldSetting {
+
+    float boost = 1;
+    int position = 0;
+    int offset;
+    int fieldLength = 0;
+
+    boolean omitNorms = false;
+    boolean isBinary = false;
+
+    private FieldSetting() {
+    }
+
+    private FieldSetting(String fieldName) {
+      super(fieldName);
+    }
+  }
+
 
 }
diff --git a/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermDocs.java b/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermDocs.java
index d89a4fe5fa3..22212470ce1 100644
--- a/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermDocs.java
+++ b/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermDocs.java
@@ -121,16 +121,11 @@ public class InstantiatedTermDocs
     } else {
       return true;
     }
-
-
   }
 
   /**
    * Does nothing
    */
   public void close() {
-
   }
-
-
 }
diff --git a/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermEnum.java b/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermEnum.java
index 742bd3b9e4e..4306466a363 100644
--- a/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermEnum.java
+++ b/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermEnum.java
@@ -61,7 +61,7 @@ public class InstantiatedTermEnum
    * Returns the current Term in the enumeration.
    */
   public Term term() {
-    return /*term == null ? null :*/ term.getTerm();
+    return term == null ? null : term.getTerm();
   }
 
   /**
diff --git a/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/package.html b/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/package.html
index 1785ee97069..4cecd146c41 100644
--- a/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/package.html
+++ b/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/package.html
@@ -70,9 +70,10 @@
 <h2>Caveats</h2>
 <ul>
   <li>No locks! Consider using InstantiatedIndex as if it was immutable.</li>
-  <li>No documents with fields containing readers!</li>
-  <li>Only FieldOption.All allowed by IndexReader#getFieldNames(FieldOption).</li>
+  <li>No documents with fields containing readers.</li>
   <li>No field selection when retrieving documents, as all stored field are available in memory.</li>
+  <li>Any document returned must cloned if they are to be touched.</li>
+  <li>Norms array returned must not be touched.</li>
 </ul>
 
 <h2>Use cases</h2>
diff --git a/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java b/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java
index f331abb9ad6..a0ec2485424 100644
--- a/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java
+++ b/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java
@@ -47,7 +47,7 @@ public class TestIndicesEquals extends TestCase {
 
     // create dir data
     IndexWriter indexWriter = new IndexWriter(dir, new StandardAnalyzer(), true);
-    for (int i = 0; i < 5; i++) {
+    for (int i = 0; i < 20; i++) {
       Document document = new Document();
       assembleDocument(document, i);
       indexWriter.addDocument(document);
@@ -59,9 +59,10 @@ public class TestIndicesEquals extends TestCase {
     InstantiatedIndex ii = new InstantiatedIndex(ir);
     ir.close();
 
-    testEquals(dir, ii);
+    testEqualBehaviour(dir, ii);
   }
 
+
   public void testInstantiatedIndexWriter() throws Exception {
 
 
@@ -86,7 +87,7 @@ public class TestIndicesEquals extends TestCase {
     }
     instantiatedIndexWriter.close();
 
-    testEquals(dir, ii);
+    testEqualBehaviour(dir, ii);
 
     testTermDocs(dir, ii);
 
@@ -186,6 +187,25 @@ public class TestIndicesEquals extends TestCase {
    * @param testIndex    the index that is supposed to equals the apriori index.
    * @throws Exception
    */
+  protected void testEqualBehaviour(Directory aprioriIndex, InstantiatedIndex testIndex) throws Exception {
+
+    testEquals(aprioriIndex,  testIndex);
+
+       // delete a few documents
+    IndexReader ir = IndexReader.open(aprioriIndex);
+    ir.deleteDocument(3);
+    ir.deleteDocument(8);
+    ir.close();
+
+    ir = testIndex.indexReaderFactory();
+    ir.deleteDocument(3);
+    ir.deleteDocument(8);
+    ir.close();
+
+    // make sure they still equal
+    testEquals(aprioriIndex,  testIndex);
+  }
+
   protected void testEquals(Directory aprioriIndex, InstantiatedIndex testIndex) throws Exception {
 
     IndexReader aprioriReader = IndexReader.open(aprioriIndex);
@@ -193,6 +213,17 @@ public class TestIndicesEquals extends TestCase {
 
     assertEquals(aprioriReader.numDocs(), testReader.numDocs());
 
+    // assert field options
+    assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.INDEXED), testReader.getFieldNames(IndexReader.FieldOption.INDEXED));
+    assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.INDEXED_NO_TERMVECTOR), testReader.getFieldNames(IndexReader.FieldOption.INDEXED_NO_TERMVECTOR));
+    assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR), testReader.getFieldNames(IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR));
+    assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.STORES_PAYLOADS), testReader.getFieldNames(IndexReader.FieldOption.STORES_PAYLOADS));
+    assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR), testReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR));
+    assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), testReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET));
+    assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), testReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION));
+    assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), testReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET));
+    assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.UNINDEXED), testReader.getFieldNames(IndexReader.FieldOption.UNINDEXED));
+
     for (Object field : aprioriReader.getFieldNames(IndexReader.FieldOption.ALL)) {
 
       // test norms as used by normal use