From 5e935d710604a5d03d20e3aebe265e1c799e2f2c Mon Sep 17 00:00:00 2001
From: Alan Woodward <alan.woodward@romseysoftware.co.uk>
Date: Mon, 25 Jan 2016 16:10:49 +0000
Subject: [PATCH] LUCENE-6992: Allow adding IndexableField to MemoryIndex

---
 lucene/CHANGES.txt                            |   3 +
 .../lucene/index/memory/MemoryIndex.java      | 107 +++++++++++++-----
 .../lucene/index/memory/TestMemoryIndex.java  |  27 +++++
 3 files changed, 106 insertions(+), 31 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 4b9b4662c85..36a667495ef 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -257,6 +257,9 @@ Other
   Locale#forLanguageTag() and Locale#toString() were placed on list
   of forbidden signatures.  (Uwe Schindler, Robert Muir)
 
+* LUCENE-6988: You can now add IndexableFields directly to a MemoryIndex,
+  and create a MemoryIndex from a lucene Document.  (Alan Woodward)
+
 ======================= Lucene 5.4.1 =======================
 
 Bug Fixes
diff --git a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
index dec42d522f3..e5e461b871c 100644
--- a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
+++ b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
@@ -33,46 +33,18 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
-import org.apache.lucene.index.BinaryDocValues;
-import org.apache.lucene.index.PointValues;
-import org.apache.lucene.index.DocValuesType;
-import org.apache.lucene.index.FieldInfo;
-import org.apache.lucene.index.FieldInfos;
-import org.apache.lucene.index.FieldInvertState;
-import org.apache.lucene.index.Fields;
-import org.apache.lucene.index.IndexOptions;
-import org.apache.lucene.index.LeafReader;
-import org.apache.lucene.index.NumericDocValues;
-import org.apache.lucene.index.OrdTermState;
-import org.apache.lucene.index.PostingsEnum;
-import org.apache.lucene.index.SortedDocValues;
-import org.apache.lucene.index.SortedNumericDocValues;
-import org.apache.lucene.index.SortedSetDocValues;
-import org.apache.lucene.index.StoredFieldVisitor;
-import org.apache.lucene.index.TermState;
-import org.apache.lucene.index.Terms;
-import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.*;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.Scorer;
 import org.apache.lucene.search.SimpleCollector;
 import org.apache.lucene.search.similarities.Similarity;
 import org.apache.lucene.store.RAMDirectory;
-import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.ByteBlockPool;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.BytesRefArray;
-import org.apache.lucene.util.BytesRefBuilder;
+import org.apache.lucene.util.*;
 import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
-import org.apache.lucene.util.BytesRefHash;
-import org.apache.lucene.util.Counter;
 import org.apache.lucene.util.IntBlockPool.SliceReader;
 import org.apache.lucene.util.IntBlockPool.SliceWriter;
-import org.apache.lucene.util.IntBlockPool;
-import org.apache.lucene.util.RamUsageEstimator;
-import org.apache.lucene.util.RecyclingByteBlockAllocator;
-import org.apache.lucene.util.RecyclingIntBlockAllocator;
 
 /**
  * High-performance single-document main memory Apache Lucene fulltext search index. 
@@ -288,6 +260,46 @@ public class MemoryIndex {
     addField(fieldName, stream, 1.0f, analyzer.getPositionIncrementGap(fieldName), analyzer.getOffsetGap(fieldName));
   }
 
+  /**
+   * Builds a MemoryIndex from a lucene {@link Document} using an analyzer
+   *
+   * @param document the document to index
+   * @param analyzer the analyzer to use
+   * @return a MemoryIndex
+   */
+  public static MemoryIndex fromDocument(Document document, Analyzer analyzer) {
+    return fromDocument(document, analyzer, false, false, 0);
+  }
+
+  /**
+   * Builds a MemoryIndex from a lucene {@link Document} using an analyzer
+   * @param document the document to index
+   * @param analyzer the analyzer to use
+   * @param storeOffsets <code>true</code> if offsets should be stored
+   * @param storePayloads <code>true</code> if payloads should be stored
+   * @return a MemoryIndex
+   */
+  public static MemoryIndex fromDocument(Document document, Analyzer analyzer, boolean storeOffsets, boolean storePayloads) {
+    return fromDocument(document, analyzer, storeOffsets, storePayloads, 0);
+  }
+
+  /**
+   * Builds a MemoryIndex from a lucene {@link Document} using an analyzer
+   * @param document the document to index
+   * @param analyzer the analyzer to use
+   * @param storeOffsets <code>true</code> if offsets should be stored
+   * @param storePayloads <code>true</code> if payloads should be stored
+   * @param maxReusedBytes the number of bytes that should remain in the internal memory pools after {@link #reset()} is called
+   * @return a MemoryIndex
+   */
+  public static MemoryIndex fromDocument(Document document, Analyzer analyzer, boolean storeOffsets, boolean storePayloads, long maxReusedBytes) {
+    MemoryIndex mi = new MemoryIndex(storeOffsets, storePayloads, maxReusedBytes);
+    for (IndexableField field : document) {
+      mi.addField(field, analyzer);
+    }
+    return mi;
+  }
+
   /**
    * Convenience method; Creates and returns a token stream that generates a
    * token for each keyword in the given collection, "as is", without any
@@ -339,6 +351,39 @@ public class MemoryIndex {
   public void addField(String fieldName, TokenStream stream) {
     addField(fieldName, stream, 1.0f);
   }
+
+  /**
+   * Adds a lucene {@link IndexableField} to the MemoryIndex using the provided analyzer
+   * @param field the field to add
+   * @param analyzer the analyzer to use for term analysis
+   * @throws IllegalArgumentException if the field is a DocValues or Point field, as these
+   *                                  structures are not supported by MemoryIndex
+   */
+  public void addField(IndexableField field, Analyzer analyzer) {
+    addField(field, analyzer, 1.0f);
+  }
+
+  /**
+   * Adds a lucene {@link IndexableField} to the MemoryIndex using the provided analyzer
+   * @param field the field to add
+   * @param analyzer the analyzer to use for term analysis
+   * @param boost a field boost
+   * @throws IllegalArgumentException if the field is a DocValues or Point field, as these
+   *                                  structures are not supported by MemoryIndex
+   */
+  public void addField(IndexableField field, Analyzer analyzer, float boost) {
+    if (field.fieldType().docValuesType() != DocValuesType.NONE)
+      throw new IllegalArgumentException("MemoryIndex does not support DocValues fields");
+    if (field.fieldType().pointDimensionCount() != 0)
+      throw new IllegalArgumentException("MemoryIndex does not support Points");
+    if (analyzer == null) {
+      addField(field.name(), field.tokenStream(null, null), boost);
+    }
+    else {
+      addField(field.name(), field.tokenStream(analyzer, null), boost,
+          analyzer.getPositionIncrementGap(field.name()), analyzer.getOffsetGap(field.name()));
+    }
+  }
   
   /**
    * Iterates over the given token stream and adds the resulting terms to the index;
diff --git a/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java b/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
index 9011231ae63..6f889048e57 100644
--- a/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
+++ b/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
@@ -22,12 +22,17 @@ import java.io.IOException;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.analysis.MockPayloadAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.FieldInvertState;
 import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.PhraseQuery;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.similarities.BM25Similarity;
 import org.apache.lucene.search.similarities.ClassicSimilarity;
@@ -156,5 +161,27 @@ public class TestMemoryIndex extends LuceneTestCase {
     TestUtil.checkReader(reader);
   }
 
+  @Test
+  public void testBuildFromDocument() {
+
+    Document doc = new Document();
+    doc.add(new TextField("field1", "some text", Field.Store.NO));
+    doc.add(new TextField("field1", "some more text", Field.Store.NO));
+    doc.add(new StringField("field2", "untokenized text", Field.Store.NO));
+
+    analyzer.setPositionIncrementGap(100);
+
+    MemoryIndex mi = MemoryIndex.fromDocument(doc, analyzer);
+
+    assertThat(mi.search(new TermQuery(new Term("field1", "text"))), not(0.0f));
+    assertThat(mi.search(new TermQuery(new Term("field2", "text"))), is(0.0f));
+    assertThat(mi.search(new TermQuery(new Term("field2", "untokenized text"))), not(0.0f));
+
+    assertThat(mi.search(new PhraseQuery("field1", "some", "more", "text")), not(0.0f));
+    assertThat(mi.search(new PhraseQuery("field1", "some", "text")), not(0.0f));
+    assertThat(mi.search(new PhraseQuery("field1", "text", "some")), is(0.0f));
+
+  }
+
 
 }