LUCENE-7679: MemoryIndex should take into account omitNorms

2017-02-07 10:41:20 +00:00 · 2017-02-07 10:41:20 +00:00 · 6696eafaae
parent aa20136bb1
commit 6696eafaae
4 changed files with 68 additions and 33 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -115,6 +115,9 @@ Bug Fixes
 * LUCENE-7676: Fixed FilterCodecReader to override more super-class methods.
  Also added TestFilterCodecReader class. (Christine Poerschke)

+* LUCENE-7679: MemoryIndex was ignoring omitNorms settings on passed-in
+  IndexableFields. (Alan Woodward)
+
 Improvements

 * LUCENE-7055: Added Weight#scorerSupplier, which allows to estimate the cost
--- a/lucene/core/src/java/org/apache/lucene/index/FieldInfo.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FieldInfo.java
@ -203,7 +203,8 @@ public final class FieldInfo {
    return pointNumBytes;
  }

-  void setDocValuesType(DocValuesType type) {
+  /** Record that this field is indexed with docvalues, with the specified type */
+  public void setDocValuesType(DocValuesType type) {
    if (type == null) {
      throw new NullPointerException("DocValuesType must not be null (field: \"" + name + "\")");
    }
--- a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
+++ b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
@ -34,6 +34,7 @@ import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
 import org.apache.lucene.document.Document;
+import org.apache.lucene.document.FieldType;
 import org.apache.lucene.index.*;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
@ -195,6 +196,8 @@ public class MemoryIndex {

  private Similarity normSimilarity = IndexSearcher.getDefaultSimilarity();

+  private FieldType defaultFieldType = new FieldType();
+
  /**
   * Constructs an empty instance that will not store offsets or payloads.
   */
@ -236,6 +239,9 @@ public class MemoryIndex {
  MemoryIndex(boolean storeOffsets, boolean storePayloads, long maxReusedBytes) {
    this.storeOffsets = storeOffsets;
    this.storePayloads = storePayloads;
+    this.defaultFieldType.setIndexOptions(storeOffsets ?
+        IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
+    this.defaultFieldType.setStoreTermVectors(true);
    this.bytesUsed = Counter.newCounter();
    final int maxBufferedByteBlocks = (int)((maxReusedBytes/2) / ByteBlockPool.BYTE_BLOCK_SIZE );
    final int maxBufferedIntBlocks = (int) ((maxReusedBytes - (maxBufferedByteBlocks*ByteBlockPool.BYTE_BLOCK_SIZE))/(IntBlockPool.INT_BLOCK_SIZE * Integer.BYTES));
@ -269,8 +275,8 @@ public class MemoryIndex {
      throw new IllegalArgumentException("analyzer must not be null");
    
    TokenStream stream = analyzer.tokenStream(fieldName, text);
-    addField(fieldName, stream, 1.0f, analyzer.getPositionIncrementGap(fieldName), analyzer.getOffsetGap(fieldName),
-        DocValuesType.NONE, null, 0, 0, null);
+    storeTerms(getInfo(fieldName, defaultFieldType), stream, 1.0f,
+        analyzer.getPositionIncrementGap(fieldName), analyzer.getOffsetGap(fieldName));
  }

  /**
@ -385,10 +391,11 @@ public class MemoryIndex {
   * @param field the field to add
   * @param analyzer the analyzer to use for term analysis
   * @param boost a field boost
-   * @throws IllegalArgumentException if the field is a DocValues or Point field, as these
-   *                                  structures are not supported by MemoryIndex
   */
  public void addField(IndexableField field, Analyzer analyzer, float boost) {
+
+    Info info = getInfo(field.name(), field.fieldType());
+
    int offsetGap;
    TokenStream tokenStream;
    int positionIncrementGap;
@ -401,6 +408,9 @@ public class MemoryIndex {
      tokenStream = field.tokenStream(null, null);
      positionIncrementGap = 0;
    }
+    if (tokenStream != null) {
+      storeTerms(info, tokenStream, boost, positionIncrementGap, offsetGap);
+    }

    DocValuesType docValuesType = field.fieldType().docValuesType();
    Object docValuesValue;
@ -420,12 +430,14 @@ public class MemoryIndex {
      default:
        throw new UnsupportedOperationException("unknown doc values type [" + docValuesType + "]");
    }
-    BytesRef pointValue = null;
-    if (field.fieldType().pointDimensionCount() > 0) {
-      pointValue = field.binaryValue();
+    if (docValuesValue != null) {
+      storeDocValues(info, docValuesType, docValuesValue);
    }
-    addField(field.name(), tokenStream, boost, positionIncrementGap, offsetGap, docValuesType, docValuesValue,
-        field.fieldType().pointDimensionCount(), field.fieldType().pointNumBytes(), pointValue);
+
+    if (field.fieldType().pointDimensionCount() > 0) {
+      storePointValues(info, field.binaryValue());
+    }
+
  }
  
  /**
@ -494,42 +506,40 @@ public class MemoryIndex {
   * @see org.apache.lucene.document.Field#setBoost(float)
   */
  public void addField(String fieldName, TokenStream tokenStream, float boost, int positionIncrementGap, int offsetGap) {
-    addField(fieldName, tokenStream, boost, positionIncrementGap, offsetGap, DocValuesType.NONE, null, 0, 0, null);
+    Info info = getInfo(fieldName, defaultFieldType);
+    storeTerms(info, tokenStream, boost, positionIncrementGap, offsetGap);
  }

-  private void addField(String fieldName, TokenStream tokenStream, float boost, int positionIncrementGap, int offsetGap,
-                        DocValuesType docValuesType, Object docValuesValue, int pointDimensionCount, int pointNumBytes,
-                        BytesRef pointValue) {
-
+  private Info getInfo(String fieldName, IndexableFieldType fieldType) {
    if (frozen) {
      throw new IllegalArgumentException("Cannot call addField() when MemoryIndex is frozen");
    }
    if (fieldName == null) {
      throw new IllegalArgumentException("fieldName must not be null");
    }
-    if (boost <= 0.0f) {
-      throw new IllegalArgumentException("boost factor must be greater than 0.0");
-    }
-
    Info info = fields.get(fieldName);
    if (info == null) {
-      IndexOptions indexOptions = storeOffsets ? IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
-      FieldInfo fieldInfo = new FieldInfo(fieldName, fields.size(), true, false, storePayloads, indexOptions, docValuesType, -1, Collections.emptyMap(), 0, 0);
-      fields.put(fieldName, info = new Info(fieldInfo, byteBlockPool));
+      fields.put(fieldName, info = new Info(createFieldInfo(fieldName, fields.size(), fieldType), byteBlockPool));
    }
-    if (pointDimensionCount > 0) {
-      storePointValues(info, pointDimensionCount, pointNumBytes, pointValue);
+    if (fieldType.pointDimensionCount() != info.fieldInfo.getPointDimensionCount()) {
+      if (fieldType.pointDimensionCount() > 0)
+        info.fieldInfo.setPointDimensions(fieldType.pointDimensionCount(), fieldType.pointNumBytes());
    }
-    if (docValuesType != DocValuesType.NONE) {
-      storeDocValues(info, docValuesType, docValuesValue);
-    }
-    if (tokenStream != null) {
-      storeTerms(info, tokenStream, boost, positionIncrementGap, offsetGap);
+    if (fieldType.docValuesType() != info.fieldInfo.getDocValuesType()) {
+      if (fieldType.docValuesType() != DocValuesType.NONE)
+        info.fieldInfo.setDocValuesType(fieldType.docValuesType());
    }
+    return info;
  }

-  private void storePointValues(Info info, int pointDimensionCount, int pointNumBytes, BytesRef pointValue) {
-    info.fieldInfo.setPointDimensions(pointDimensionCount, pointNumBytes);
+  private FieldInfo createFieldInfo(String fieldName, int ord, IndexableFieldType fieldType) {
+    IndexOptions indexOptions = storeOffsets ? IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
+    return new FieldInfo(fieldName, ord, fieldType.storeTermVectors(), fieldType.omitNorms(), storePayloads,
+        indexOptions, fieldType.docValuesType(), -1, Collections.emptyMap(),
+        fieldType.pointDimensionCount(), fieldType.pointNumBytes());
+  }
+
+  private void storePointValues(Info info, BytesRef pointValue) {
    if (info.pointValues == null) {
      info.pointValues = new BytesRef[4];
    }
@ -591,6 +601,11 @@ public class MemoryIndex {
  }

  private void storeTerms(Info info, TokenStream tokenStream, float boost, int positionIncrementGap, int offsetGap) {
+
+    if (boost <= 0.0f) {
+      throw new IllegalArgumentException("boost factor must be greater than 0.0");
+    }
+
    int pos = -1;
    int offset = 0;
    if (info.numTokens == 0) {
@ -1598,7 +1613,7 @@ public class MemoryIndex {
    @Override
    public NumericDocValues getNormValues(String field) {
      Info info = fields.get(field);
-      if (info == null) {
+      if (info == null || info.fieldInfo.omitsNorms()) {
        return null;
      }
      return info.getNormDocValues();
--- a/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
+++ b/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
@ -45,6 +45,7 @@ import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.BinaryDocValues;
 import org.apache.lucene.index.DocValuesType;
 import org.apache.lucene.index.FieldInvertState;
+import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.LeafReader;
@ -199,6 +200,21 @@ public class TestMemoryIndex extends LuceneTestCase {
    TestUtil.checkReader(reader);
  }

+  @Test
+  public void testOmitNorms() throws IOException {
+    MemoryIndex mi = new MemoryIndex();
+    FieldType ft = new FieldType();
+    ft.setTokenized(true);
+    ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
+    ft.setOmitNorms(true);
+    mi.addField(new Field("f1", "some text in here", ft), analyzer);
+    mi.freeze();
+
+    LeafReader leader = (LeafReader) mi.createSearcher().getIndexReader();
+    NumericDocValues norms = leader.getNormValues("f1");
+    assertNull(norms);
+  }
+
  @Test
  public void testBuildFromDocument() {

@ -276,7 +292,7 @@ public class TestMemoryIndex extends LuceneTestCase {
    try {
      MemoryIndex.fromDocument(doc, analyzer);
    } catch (IllegalArgumentException e) {
-      assertEquals("Can't add [BINARY] doc values field [field], because [NUMERIC] doc values field already exists", e.getMessage());
+      assertEquals("cannot change DocValues type from NUMERIC to BINARY for field \"field\"", e.getMessage());
    }

    doc = new Document();