LUCENE-6034: Simplify MemoryIndex state via TreeMap<Info> etc.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1643298 13f79535-47bb-0310-9956-ffa450edef68
2014-12-05 14:43:16 +00:00 · 2014-12-05 14:43:16 +00:00 · e460ce328c
parent 83f0d3578c
commit e460ce328c
3 changed files with 128 additions and 238 deletions
--- a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
+++ b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
@ -18,13 +18,12 @@ package org.apache.lucene.index.memory;
 */
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Comparator;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.Map;
-import java.util.NoSuchElementException;
+import java.util.SortedMap;
 import java.util.TreeMap;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
@ -61,17 +60,16 @@ import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.ByteBlockPool;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
 import org.apache.lucene.util.BytesRefHash;
 import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
 import org.apache.lucene.util.Counter;
 import org.apache.lucene.util.IntBlockPool;
 import org.apache.lucene.util.IntBlockPool.SliceReader;
 import org.apache.lucene.util.IntBlockPool.SliceWriter;
 import org.apache.lucene.util.IntBlockPool;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.RecyclingByteBlockAllocator;
 import org.apache.lucene.util.RecyclingIntBlockAllocator;
 /**
 * High-performance single-document main memory Apache Lucene fulltext search index. 
 * <p>
@ -190,10 +188,7 @@ import org.apache.lucene.util.RecyclingIntBlockAllocator;
 public class MemoryIndex {
  /** info for each field: Map&lt;String fieldName, Info field&gt; */
-  private final HashMap<String,Info> fields = new HashMap<>();
+  private final SortedMap<String,Info> fields = new TreeMap<>();
  /** fields sorted ascending by fieldName; lazily computed on demand */
  private transient Map.Entry<String,Info>[] sortedFields; 
  private final boolean storeOffsets;
@ -204,29 +199,12 @@ public class MemoryIndex {
 //  private final IntBlockPool.SliceReader postingsReader;
  private final IntBlockPool.SliceWriter postingsWriter;
  private HashMap<String,FieldInfo> fieldInfos = new HashMap<>();
  private Counter bytesUsed;
  private boolean frozen = false;
  private Similarity normSimilarity = IndexSearcher.getDefaultSimilarity();
  /**
   * Sorts term entries into ascending order; also works for
   * Arrays.binarySearch() and Arrays.sort()
   */
  private static final Comparator<Object> termComparator = new Comparator<Object>() {
    @Override
    @SuppressWarnings({"unchecked","rawtypes"})
    public int compare(Object o1, Object o2) {
      if (o1 instanceof Map.Entry<?,?>) o1 = ((Map.Entry<?,?>) o1).getKey();
      if (o2 instanceof Map.Entry<?,?>) o2 = ((Map.Entry<?,?>) o2).getKey();
      if (o1 == o2) return 0;
      return ((Comparable) o1).compareTo((Comparable) o2);
    }
  };
  /**
   * Constructs an empty instance.
   */
@ -247,7 +225,6 @@ public class MemoryIndex {
   */
  public MemoryIndex(boolean storeOffsets) {
    this(storeOffsets, 0);
  }
  /**
@ -429,10 +406,12 @@ public class MemoryIndex {
      int pos = -1;
      final BytesRefHash terms;
      final SliceByteStartArray sliceArray;
-      Info info = null;
+      Info info;
      long sumTotalTermFreq = 0;
      int offset = 0;
      FieldInfo fieldInfo;
      if ((info = fields.get(fieldName)) != null) {
        fieldInfo = info.fieldInfo;
        numTokens = info.numTokens;
        numOverlapTokens = info.numOverlapTokens;
        pos = info.lastPosition + positionIncrementGap;
@ -442,16 +421,13 @@ public class MemoryIndex {
        sliceArray = info.sliceArray;
        sumTotalTermFreq = info.sumTotalTermFreq;
      } else {
        fieldInfo = new FieldInfo(fieldName, fields.size(), false, false, false,
            this.storeOffsets ? IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS,
            DocValuesType.NONE, -1, null);
        sliceArray = new SliceByteStartArray(BytesRefHash.DEFAULT_CAPACITY);
        terms = new BytesRefHash(byteBlockPool, BytesRefHash.DEFAULT_CAPACITY, sliceArray);
      }
      if (!fieldInfos.containsKey(fieldName)) {
        fieldInfos.put(fieldName, 
            new FieldInfo(fieldName, fieldInfos.size(), false, false, false,
                          this.storeOffsets ? IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS,
                          DocValuesType.NONE, -1, null));
      }
      TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
      PositionIncrementAttribute posIncrAttribute = stream.addAttribute(PositionIncrementAttribute.class);
      OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
@ -488,8 +464,7 @@ public class MemoryIndex {
      // ensure infos.numTokens > 0 invariant; needed for correct operation of terms()
      if (numTokens > 0) {
-        fields.put(fieldName, new Info(terms, sliceArray, numTokens, numOverlapTokens, boost, pos, offsetAtt.endOffset() + offset, sumTotalTermFreq));
+        fields.put(fieldName, new Info(fieldInfo, terms, sliceArray, numTokens, numOverlapTokens, boost, pos, offsetAtt.endOffset() + offset, sumTotalTermFreq));
        sortedFields = null;    // invalidate sorted view, if any
      }
    } catch (Exception e) { // can never happen
      throw new RuntimeException(e);
@ -510,7 +485,13 @@ public class MemoryIndex {
  public void setSimilarity(Similarity similarity) {
    if (frozen)
      throw new IllegalArgumentException("Cannot set Similarity when MemoryIndex is frozen");
    if (this.normSimilarity == similarity)
      return;
    this.normSimilarity = similarity;
    //invalidate any cached norms that may exist
    for (Info info : fields.values()) {
      info.norms = null;
    }
  }
  /**
@ -528,17 +509,16 @@ public class MemoryIndex {
  /**
   * Prepares the MemoryIndex for querying in a non-lazy way.
-   *
+   * <p>
   * After calling this you can query the MemoryIndex from multiple threads, but you
   * cannot subsequently add new data.
   */
  public void freeze() {
    this.frozen = true;
-    sortFields();
+    for (Info info : fields.values()) {
-    for (Map.Entry<String,Info> info : sortedFields) {
+      info.sortTerms();
-      info.getValue().sortTerms();
+      info.getNormDocValues();//lazily computed
    }
    calculateNormValues();
  }
  /**
@ -589,7 +569,7 @@ public class MemoryIndex {
       * NOT close the index reader!!! This avoids all sorts of
       * unnecessary baggage and locking in the Lucene IndexReader
       * superclass, all of which is completely unnecessary for this main
-       * memory index data structure without thread-safety claims.
+       * memory index data structure.
       * 
       * Wishing IndexReader would be an interface...
       * 
@ -600,26 +580,6 @@ public class MemoryIndex {
    }   
  }
  /** sorts into ascending order (on demand), reusing memory along the way */
  private void sortFields() {
    if (sortedFields == null) sortedFields = sort(fields);
  }
  /** returns a view of the given map's entries, sorted ascending by key */
  private static <K,V> Map.Entry<K,V>[] sort(HashMap<K,V> map) {
    int size = map.size();
    @SuppressWarnings("unchecked")
    Map.Entry<K,V>[] entries = new Map.Entry[size];
    Iterator<Map.Entry<K,V>> iter = map.entrySet().iterator();
    for (int i=0; i < size; i++) {
      entries[i] = iter.next();
    }
    if (size > 1) ArrayUtil.introSort(entries, termComparator);
    return entries;
  }
  /**
   * Returns a String representation of the index data for debugging purposes.
   * 
@ -628,12 +588,10 @@ public class MemoryIndex {
  @Override
  public String toString() {
    StringBuilder result = new StringBuilder(256);
    sortFields();   
    int sumPositions = 0;
    int sumTerms = 0;
    final BytesRef spare = new BytesRef();
-    for (int i=0; i < sortedFields.length; i++) {
+    for (Map.Entry<String, Info> entry : fields.entrySet()) {
      Map.Entry<String,Info> entry = sortedFields[i];
      String fieldName = entry.getKey();
      Info info = entry.getValue();
      info.sortTerms();
@ -676,23 +634,28 @@ public class MemoryIndex {
      sumTerms += info.terms.size();
    }
-    result.append("\nfields=" + sortedFields.length);
+    result.append("\nfields=" + fields.size());
    result.append(", terms=" + sumTerms);
    result.append(", positions=" + sumPositions);
    return result.toString();
  }
  /**
-   * Index data structure for a field; Contains the tokenized term texts and
+   * Index data structure for a field; contains the tokenized term texts and
   * their positions.
   */
-  private static final class Info {
+  private final class Info {
    private final FieldInfo fieldInfo;
    /** The norms for this field; computed on demand. */
    private transient NumericDocValues norms;
    /**
     * Term strings and their positions for this field: Map &lt;String
     * termText, ArrayIntList positions&gt;
     */
-    private final BytesRefHash terms; 
+    private final BytesRefHash terms; // note unfortunate variable name class with Terms type
    private final SliceByteStartArray sliceArray;
@ -716,7 +679,8 @@ public class MemoryIndex {
    /** the last offset encountered in this field for multi field support*/
    private final int lastOffset;
-    public Info(BytesRefHash terms, SliceByteStartArray sliceArray, int numTokens, int numOverlapTokens, float boost, int lastPosition, int lastOffset, long sumTotalTermFreq) {
+    public Info(FieldInfo fieldInfo, BytesRefHash terms, SliceByteStartArray sliceArray, int numTokens, int numOverlapTokens, float boost, int lastPosition, int lastOffset, long sumTotalTermFreq) {
      this.fieldInfo = fieldInfo;
      this.terms = terms;
      this.sliceArray = sliceArray; 
      this.numTokens = numTokens;
@ -727,10 +691,6 @@ public class MemoryIndex {
      this.lastOffset = lastOffset;
    }
    public long getSumTotalTermFreq() {
      return sumTotalTermFreq;
    }
    /**
     * Sorts hashed terms into ascending order, reusing memory along the
     * way. Note that sorting is lazily delayed until required (often it's
@ -740,12 +700,30 @@ public class MemoryIndex {
     * apart from more sophisticated Tries / prefix trees).
     */
    public void sortTerms() {
-      if (sortedTerms == null) 
+      if (sortedTerms == null) {
        sortedTerms = terms.sort(BytesRef.getUTF8SortedAsUnicodeComparator());
      }
    }
-    public float getBoost() {
+    public NumericDocValues getNormDocValues() {
-      return boost;
+      if (norms == null) {
        FieldInvertState invertState = new FieldInvertState(fieldInfo.name, fieldInfo.number,
            numTokens, numOverlapTokens, 0, boost);
        final long value = normSimilarity.computeNorm(invertState);
        if (DEBUG) System.err.println("MemoryIndexReader.norms: " + fieldInfo.name + ":" + value + ":" + numTokens);
        norms = new NumericDocValues() {
          @Override
          public long get(int docID) {
            if (docID != 0)
              throw new IndexOutOfBoundsException();
            else
              return value;
          }
        };
      }
      return norms;
    }
  }
@ -777,10 +755,6 @@ public class MemoryIndex {
      return fields.get(fieldName);
    }
    private Info getInfo(int pos) {
      return sortedFields[pos].getValue();
    }
    @Override
    public Bits getLiveDocs() {
      return null;
@ -788,7 +762,12 @@ public class MemoryIndex {
    @Override
    public FieldInfos getFieldInfos() {
-      return new FieldInfos(fieldInfos.values().toArray(new FieldInfo[fieldInfos.size()]));
+      FieldInfo[] fieldInfos = new FieldInfo[fields.size()];
      int i = 0;
      for (Info info : fields.values()) {
        fieldInfos[i++] = info.fieldInfo;
      }
      return new FieldInfos(fieldInfos);
    }
    @Override
@ -829,38 +808,14 @@ public class MemoryIndex {
    private class MemoryFields extends Fields {
      @Override
      public Iterator<String> iterator() {
-        return new Iterator<String>() {
+        return fields.keySet().iterator();
          int upto = -1;
          @Override
          public String next() {
            upto++;
            if (upto >= sortedFields.length) {
              throw new NoSuchElementException();
            }
            return sortedFields[upto].getKey();
          }
          @Override
          public boolean hasNext() {
            return upto+1 < sortedFields.length;
          }
          @Override
          public void remove() {
            throw new UnsupportedOperationException();
          }
        };
      }
      @Override
      public Terms terms(final String field) {
-        int i = Arrays.binarySearch(sortedFields, field, termComparator);
+        final Info info = fields.get(field);
-        if (i < 0) {
+        if (info == null)
          return null;
        } else {
          final Info info = getInfo(i);
          info.sortTerms();
        return new Terms() {
          @Override
@ -875,7 +830,7 @@ public class MemoryIndex {
          @Override
          public long getSumTotalTermFreq() {
-              return info.getSumTotalTermFreq();
+            return info.sumTotalTermFreq;
          }
          @Override
@ -886,7 +841,7 @@ public class MemoryIndex {
          @Override
          public int getDocCount() {
-              return info.terms.size() > 0 ? 1 : 0;
+            return size() > 0 ? 1 : 0;
          }
          @Override
@ -910,17 +865,15 @@ public class MemoryIndex {
          }
        };
      }
      }
      @Override
      public int size() {
-        return sortedFields.length;
+        return fields.size();
      }
    }
    @Override
    public Fields fields() {
      sortFields();
      return new MemoryFields();
    }
@ -1208,44 +1161,20 @@ public class MemoryIndex {
    @Override
    public NumericDocValues getNormValues(String field) {
      if (norms == null)
        return calculateFieldNormValue(field);
      return norms.get(field);
    }
  }
  private Map<String, NumericDocValues> norms = null;
  private NumericDocValues calculateFieldNormValue(String field) {
    FieldInfo fieldInfo = fieldInfos.get(field);
    if (fieldInfo == null)
      return null;
      Info info = fields.get(field);
-    int numTokens = info != null ? info.numTokens : 0;
+      if (info == null) {
-    int numOverlapTokens = info != null ? info.numOverlapTokens : 0;
+        return null;
-    float boost = info != null ? info.getBoost() : 1.0f;
+      }
-    FieldInvertState invertState = new FieldInvertState(field, 0, numTokens, numOverlapTokens, 0, boost);
+      return info.getNormDocValues();
    long value = normSimilarity.computeNorm(invertState);
    if (DEBUG) System.err.println("MemoryIndexReader.norms: " + field + ":" + value + ":" + numTokens);
    return new MemoryIndexNormDocValues(value);
    }
  private void calculateNormValues() {
    norms = new HashMap<>();
    for (String field : fieldInfos.keySet()) {
      norms.put(field, calculateFieldNormValue(field));
    }
  }
  /**
   * Resets the {@link MemoryIndex} to its initial state and recycles all internal buffers.
   */
  public void reset() {
-    this.fieldInfos.clear();
+    fields.clear();
    this.fields.clear();
    this.sortedFields = null;
    this.norms = null;
    this.normSimilarity = IndexSearcher.getDefaultSimilarity();
    byteBlockPool.reset(false, false); // no need to 0-fill the buffers
    intBlockPool.reset(true, false); // here must must 0-fill since we use slices
--- a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndexNormDocValues.java
+++ b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndexNormDocValues.java
@ -1,39 +0,0 @@
 package org.apache.lucene.index.memory;
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import org.apache.lucene.index.NumericDocValues;
 /**
 * 
 * @lucene.internal
 */
 class MemoryIndexNormDocValues extends NumericDocValues {
  private final long value;
  public MemoryIndexNormDocValues(long value) {
    this.value = value;
  }
  @Override
  public long get(int docID) {
    if (docID != 0)
      throw new IndexOutOfBoundsException();
    else
      return value;
  }
 }
--- a/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
+++ b/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
@ -100,7 +100,7 @@ public class TestMemoryIndex extends LuceneTestCase {
    LeafReader reader = (LeafReader) searcher.getIndexReader();
    float n1 = reader.getNormValues("f1").get(0);
-    // Norms aren't cached, so we can change the Similarity
+    // Norms are re-computed when we change the Similarity
    mi.setSimilarity(new DefaultSimilarity() {
      @Override
      public float lengthNorm(FieldInvertState state) {