SOLR-12782: UninvertingReader avoids FieldInfo creation.

Wrapping is now a bit more lightweight: Does not create FieldInfo for fields that can't be uninverted (saves mem) and can avoid wrapping the reader altogether if there's nothing to uninvert. IndexSchema.getUninversionMap refactored to getUninversionMapper and no longer merges FieldInfos.
2025-02-09 11:35:14 +00:00 · 2018-10-05 20:40:39 -07:00 · 2018-10-05 20:40:39 -07:00 · e2b8beccb0
commit e2b8beccb0
parent eb47099ee2
10 changed files with 120 additions and 123 deletions
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -169,9 +169,14 @@ Bug Fixes

 Improvements
 ----------------------
+
 * SOLR-12767: Solr now always includes in the response of update requests the achieved replication factor
   (Tomás Fernández Löbbe)

+* SOLR-12782: UninvertingReader wrapping is now a bit more lightweight: Does not create FieldInfo for fields that
+  can't be uninverted (saves mem) and can avoid wrapping the reader altogether if there's nothing to uninvert.
+  IndexSchema.getUninversionMap refactored to getUninversionMapper and no longer merges FieldInfos. (David Smiley)
+
 ==================  7.5.0 ==================

 Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.
--- a/solr/core/src/java/org/apache/solr/handler/component/ExpandComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/ExpandComponent.java
@ -19,10 +19,9 @@ package org.apache.solr.handler.component;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.HashMap;
+import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
-import java.util.Map;

 import com.carrotsearch.hppc.IntHashSet;
 import com.carrotsearch.hppc.IntObjectHashMap;
@ -215,10 +214,9 @@ public class ExpandComponent extends SearchComponent implements PluginInfoInitia
    if(fieldType instanceof StrField) {
      //Get The Top Level SortedDocValues
      if(CollapsingQParserPlugin.HINT_TOP_FC.equals(hint)) {
-        Map<String, UninvertingReader.Type> mapping = new HashMap();
-        mapping.put(field, UninvertingReader.Type.SORTED);
-        @SuppressWarnings("resource")
-        UninvertingReader uninvertingReader = new UninvertingReader(new ReaderWrapper(searcher.getSlowAtomicReader(), field), mapping);
+        @SuppressWarnings("resource") LeafReader uninvertingReader = UninvertingReader.wrap(
+            new ReaderWrapper(searcher.getSlowAtomicReader(), field),
+            Collections.singletonMap(field, UninvertingReader.Type.SORTED)::get);
        values = uninvertingReader.getSortedDocValues(field);
      } else {
        values = DocValues.getSorted(reader, field);
@ -386,10 +384,9 @@ public class ExpandComponent extends SearchComponent implements PluginInfoInitia
    if(values != null) {
      //Get The Top Level SortedDocValues again so we can re-iterate:
      if(CollapsingQParserPlugin.HINT_TOP_FC.equals(hint)) {
-        Map<String, UninvertingReader.Type> mapping = new HashMap();
-        mapping.put(field, UninvertingReader.Type.SORTED);
-        @SuppressWarnings("resource")
-        UninvertingReader uninvertingReader = new UninvertingReader(new ReaderWrapper(searcher.getSlowAtomicReader(), field), mapping);
+        @SuppressWarnings("resource") LeafReader uninvertingReader = UninvertingReader.wrap(
+            new ReaderWrapper(searcher.getSlowAtomicReader(), field),
+            Collections.singletonMap(field, UninvertingReader.Type.SORTED)::get);
        values = uninvertingReader.getSortedDocValues(field);
      } else {
        values = DocValues.getSorted(reader, field);
--- a/solr/core/src/java/org/apache/solr/index/UninvertDocValuesMergePolicyFactory.java
+++ b/solr/core/src/java/org/apache/solr/index/UninvertDocValuesMergePolicyFactory.java
@ -29,6 +29,7 @@ import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FieldInfos;
 import org.apache.lucene.index.FilterCodecReader;
 import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.MergePolicy;
 import org.apache.lucene.index.NumericDocValues;
 import org.apache.lucene.index.OneMergeWrappingMergePolicy;
@ -134,13 +135,13 @@ public class UninvertDocValuesMergePolicyFactory extends WrapperMergePolicyFacto
   */
  private class UninvertingFilterCodecReader extends FilterCodecReader {

-    private final UninvertingReader uninvertingReader;
+    private final LeafReader uninvertingReader;
    private final DocValuesProducer docValuesProducer;

    public UninvertingFilterCodecReader(CodecReader in, Map<String,UninvertingReader.Type> uninversionMap) {
      super(in);

-      this.uninvertingReader = new UninvertingReader(in, uninversionMap);
+      this.uninvertingReader = UninvertingReader.wrap(in, uninversionMap::get);
      this.docValuesProducer = new DocValuesProducer() {

        @Override
--- a/solr/core/src/java/org/apache/solr/schema/IndexSchema.java
+++ b/solr/core/src/java/org/apache/solr/schema/IndexSchema.java
@ -44,11 +44,7 @@ import java.util.stream.Stream;

 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.DelegatingAnalyzerWrapper;
-import org.apache.lucene.index.DocValuesType;
-import org.apache.lucene.index.FieldInfo;
-import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexableField;
-import org.apache.lucene.index.MultiFields;
 import org.apache.lucene.queries.payloads.PayloadDecoder;
 import org.apache.lucene.search.similarities.Similarity;
 import org.apache.lucene.util.Version;
@ -355,22 +351,15 @@ public class IndexSchema {
    queryAnalyzer = new SolrQueryAnalyzer();
  }

-  public Map<String,UninvertingReader.Type> getUninversionMap(IndexReader reader) {
-    final Map<String,UninvertingReader.Type> map = new HashMap<>();
-    for (FieldInfo f : MultiFields.getMergedFieldInfos(reader)) {
-      if (f.getDocValuesType() == DocValuesType.NONE) {
-        // we have a field (of some kind) in the reader w/o DocValues
-        // if we have an equivalent indexed=true field in the schema, trust it's uninversion type (if any)
-        final SchemaField sf = getFieldOrNull(f.name);
-        if (sf != null && sf.indexed()) {
-          final UninvertingReader.Type type = sf.getType().getUninversionType(sf);
-          if (type != null) {
-            map.put(f.name, type);
+  /** @see UninvertingReader */
+  public Function<String, UninvertingReader.Type> getUninversionMapper() {
+    return name -> {
+      SchemaField sf = getFieldOrNull(name);
+      if (sf == null) {
+        return null;
      }
-        }
-      }
-    }
-    return map;
+      return sf.getType().getUninversionType(sf);
+    };
  }

  /**
--- a/solr/core/src/java/org/apache/solr/search/CollapsingQParserPlugin.java
+++ b/solr/core/src/java/org/apache/solr/search/CollapsingQParserPlugin.java
@ -19,8 +19,8 @@ package org.apache.solr.search;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.EnumSet;
-import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
@ -1260,11 +1260,9 @@ public class CollapsingQParserPlugin extends QParserPlugin {
          * This hint forces the use of the top level field cache for String fields.
          * This is VERY fast at query time but slower to warm and causes insanity.
          */
-
-          Map<String, UninvertingReader.Type> mapping = new HashMap();
-          mapping.put(collapseField, UninvertingReader.Type.SORTED);
-          @SuppressWarnings("resource") final UninvertingReader uninvertingReader =
-              new UninvertingReader(new ReaderWrapper(searcher.getSlowAtomicReader(), collapseField), mapping);
+          @SuppressWarnings("resource") final LeafReader uninvertingReader = UninvertingReader.wrap(
+                  new ReaderWrapper(searcher.getSlowAtomicReader(), collapseField),
+                  Collections.singletonMap(collapseField, UninvertingReader.Type.SORTED)::get);

          docValuesProducer = new EmptyDocValuesProducer() {
              @Override
--- a/solr/core/src/java/org/apache/solr/search/Insanity.java
+++ b/solr/core/src/java/org/apache/solr/search/Insanity.java
@ -49,8 +49,9 @@ public class Insanity {
   * instead of a numeric.
   */
  public static LeafReader wrapInsanity(LeafReader sane, String insaneField) {
-    return new UninvertingReader(new InsaneReader(sane, insaneField),
-                                 Collections.singletonMap(insaneField, UninvertingReader.Type.SORTED));
+    return UninvertingReader.wrap(
+        new InsaneReader(sane, insaneField),
+        Collections.singletonMap(insaneField, UninvertingReader.Type.SORTED)::get);
  }
  
  /** Hides the proper numeric dv type for the field */
--- a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
+++ b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
@ -157,25 +157,10 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
  private static DirectoryReader wrapReader(SolrCore core, DirectoryReader reader) throws IOException {
    assert reader != null;
    return ExitableDirectoryReader.wrap(
-        wrapUninvertingReaderPerSegment(core, reader),
+        UninvertingReader.wrap(reader, core.getLatestSchema().getUninversionMapper()),
        SolrQueryTimeoutImpl.getInstance());
  }

-  /**
-   * If docvalues are enabled or disabled after data has already been indexed for a field, such that
-   * only some segments have docvalues, uninverting on the top level reader will cause 
-   * IllegalStateException to be thrown when trying to use a field with such mixed data. This is because
-   * the {@link IndexSchema#getUninversionMap(IndexReader)} method decides to put a field 
-   * into the uninverteding map only if *NO* segment in the index contains docvalues for that field.
-   * 
-   * Therefore, this class provides a uninverting map per segment such that for any field, 
-   * DocValues are used from segments if they exist and uninversion of the field is performed on the rest
-   * of the segments.
-   */
-   private static DirectoryReader wrapUninvertingReaderPerSegment(SolrCore core, DirectoryReader reader) throws IOException {
-     return UninvertingReader.wrap(reader, r -> core.getLatestSchema().getUninversionMap(r));
-   }
-
  /**
   * Builds the necessary collector chain (via delegate wrapping) and executes the query against it. This method takes
   * into consideration both the explicitly provided collector and postFilter as well as any needed collector wrappers
--- a/solr/core/src/java/org/apache/solr/uninverting/UninvertingReader.java
+++ b/solr/core/src/java/org/apache/solr/uninverting/UninvertingReader.java
@ -21,11 +21,11 @@ import java.util.ArrayList;
 import java.util.Map;
 import java.util.function.Function;

-import org.apache.lucene.document.BinaryDocValuesField; // javadocs
-import org.apache.lucene.document.NumericDocValuesField; // javadocs
-import org.apache.lucene.document.SortedDocValuesField; // javadocs
-import org.apache.lucene.document.SortedSetDocValuesField; // javadocs
-import org.apache.lucene.document.StringField; // javadocs
+import org.apache.lucene.document.BinaryDocValuesField;
+import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.document.SortedDocValuesField;
+import org.apache.lucene.document.SortedSetDocValuesField;
+import org.apache.lucene.document.StringField;
 import org.apache.lucene.index.BinaryDocValues;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.DocValuesType;
@ -174,32 +174,32 @@ public class UninvertingReader extends FilterLeafReader {

  }

+  /** @see #wrap(DirectoryReader, Function) */
+  public static DirectoryReader wrap(DirectoryReader reader, Map<String, Type> mapping) throws IOException {
+    return wrap(reader, mapping::get);
+  }
+
  /**
-   * 
-   * Wraps a provided DirectoryReader. Note that for convenience, the returned reader
+   * Wraps a provided {@link DirectoryReader}. Note that for convenience, the returned reader
   * can be used normally (e.g. passed to {@link DirectoryReader#openIfChanged(DirectoryReader)})
   * and so on. 
   * 
   * @param in input directory reader
-   * @param perSegmentMapper function to map a segment reader to a mapping of fields to their uninversion type
+   * @param mapper function to map a field name to an uninversion type.  A Null result means to not uninvert.
   * @return a wrapped directory reader
   */
-  public static DirectoryReader wrap(DirectoryReader in, final Function<LeafReader, Map<String,Type>> perSegmentMapper) throws IOException {
-    return new UninvertingDirectoryReader(in, perSegmentMapper);
-  }
-  
-  public static DirectoryReader wrap(DirectoryReader in, final Map<String,Type> mapping) throws IOException {
-    return UninvertingReader.wrap(in, (r) -> mapping);
+  public static DirectoryReader wrap(DirectoryReader in, Function<String, Type> mapper) throws IOException {
+    return new UninvertingDirectoryReader(in, mapper);
  }

  static class UninvertingDirectoryReader extends FilterDirectoryReader {
-    final Function<LeafReader, Map<String,Type>> mapper;
+    final Function<String, Type> mapper;
    
-    public UninvertingDirectoryReader(DirectoryReader in, final Function<LeafReader, Map<String,Type>> mapper) throws IOException {
+    public UninvertingDirectoryReader(DirectoryReader in, final Function<String, Type> mapper) throws IOException {
      super(in, new FilterDirectoryReader.SubReaderWrapper() {
        @Override
        public LeafReader wrap(LeafReader reader) {
-          return new UninvertingReader(reader, mapper.apply(reader));
+          return UninvertingReader.wrap(reader, mapper);
        }
      });
      this.mapper = mapper;
@ -220,25 +220,25 @@ public class UninvertingReader extends FilterLeafReader {
    }
  }

-  final Map<String,Type> mapping;
-  final FieldInfos fieldInfos;
-  
  /**
-   * Create a new UninvertingReader with the specified mapping 
+   * Create a new UninvertingReader with the specified mapping, wrapped around the input.  It may be deemed that there
+   * is no mapping to do, in which case the input is returned.
   * <p>
-   * Expert: This should almost never be used. Use {@link #wrap(DirectoryReader, Function)}
-   * instead.
+   * Expert: This should almost never be used. Use {@link #wrap(DirectoryReader, Function)} instead.
   *
   * @lucene.internal
   */
-  public UninvertingReader(LeafReader in, Map<String,Type> mapping) {
-    super(in);
-    this.mapping = mapping;
-    ArrayList<FieldInfo> filteredInfos = new ArrayList<>();
+  public static LeafReader wrap(LeafReader in, Function<String, Type> mapping) {
+    boolean wrap = false;
+
+    // Calculate a new FieldInfos that has DocValuesType where we didn't before
+    ArrayList<FieldInfo> newFieldInfos = new ArrayList<>(in.getFieldInfos().size());
    for (FieldInfo fi : in.getFieldInfos()) {
      DocValuesType type = fi.getDocValuesType();
-      if (type == DocValuesType.NONE) {        
-        Type t = mapping.get(fi.name);
+      // fields which currently don't have docValues, but are uninvertable (indexed or points data present)
+      if (type == DocValuesType.NONE &&
+          (fi.getIndexOptions() != IndexOptions.NONE || (fi.getPointNumBytes() > 0 && fi.getPointDimensionCount() == 1))) {
+        Type t = mapping.apply(fi.name); // could definitely return null, thus still can't uninvert it
        if (t != null) {
          if (t == Type.INTEGER_POINT || t == Type.LONG_POINT || t == Type.FLOAT_POINT || t == Type.DOUBLE_POINT) {
            // type uses points
@ -280,11 +280,30 @@ public class UninvertingReader extends FilterLeafReader {
          }
        }
      }
-      filteredInfos.add(new FieldInfo(fi.name, fi.number, fi.hasVectors(), fi.omitsNorms(),
+      if (type != fi.getDocValuesType()) { // we changed it
+        wrap = true;
+        newFieldInfos.add(new FieldInfo(fi.name, fi.number, fi.hasVectors(), fi.omitsNorms(),
            fi.hasPayloads(), fi.getIndexOptions(), type, fi.getDocValuesGen(), fi.attributes(),
            fi.getPointDimensionCount(), fi.getPointNumBytes(), fi.isSoftDeletesField()));
+      } else {
+        newFieldInfos.add(fi);
      }
-    fieldInfos = new FieldInfos(filteredInfos.toArray(new FieldInfo[filteredInfos.size()]));
+    }
+    if (!wrap) {
+      return in;
+    } else {
+      FieldInfos fieldInfos = new FieldInfos(newFieldInfos.toArray(new FieldInfo[newFieldInfos.size()]));
+      return new UninvertingReader(in, mapping, fieldInfos);
+    }
+  }
+
+  final Function<String, Type> mapping;
+  final FieldInfos fieldInfos;
+
+  private UninvertingReader(LeafReader in, Function<String, Type> mapping, FieldInfos fieldInfos) {
+    super(in);
+    this.mapping = mapping;
+    this.fieldInfos = fieldInfos;
  }

  @Override
@ -388,11 +407,7 @@ public class UninvertingReader extends FilterLeafReader {
   * if the field doesn't exist or doesn't have a mapping.
   */
  private Type getType(String field) {
-    FieldInfo info = fieldInfos.fieldInfo(field);
-    if (info == null || info.getDocValuesType() == DocValuesType.NONE) {
-      return null;
-    }
-    return mapping.get(field);
+    return mapping.apply(field);
  }

  // NOTE: delegating the cache helpers is wrong since this wrapper alters the
--- a/solr/core/src/java/org/apache/solr/update/DeleteByQueryWrapper.java
+++ b/solr/core/src/java/org/apache/solr/update/DeleteByQueryWrapper.java
@ -50,7 +50,7 @@ final class DeleteByQueryWrapper extends Query {
  }
  
  LeafReader wrap(LeafReader reader) {
-    return new UninvertingReader(reader, schema.getUninversionMap(reader));
+    return UninvertingReader.wrap(reader, schema.getUninversionMapper());
  }
  
  // we try to be well-behaved, but we are not (and IW's applyQueryDeletes isn't much better...)
--- a/solr/core/src/test/org/apache/solr/uninverting/TestUninvertingReader.java
+++ b/solr/core/src/test/org/apache/solr/uninverting/TestUninvertingReader.java
@ -36,6 +36,7 @@ import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.DocValues;
 import org.apache.lucene.index.DocValuesType;
 import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfos;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.LeafReaderContext;
@ -367,24 +368,29 @@ public class TestUninvertingReader extends LuceneTestCase {
    uninvertingMap.put("dv", Type.LEGACY_INTEGER);
    uninvertingMap.put("dint", Type.INTEGER_POINT);

-    DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir), 
-                         uninvertingMap);
+    DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir), uninvertingMap);
    LeafReader leafReader = ir.leaves().get(0).reader();
+    FieldInfos fieldInfos = leafReader.getFieldInfos();
+    LeafReader originalLeafReader = ((UninvertingReader)leafReader).getDelegate();

-    FieldInfo intFInfo = leafReader.getFieldInfos().fieldInfo("int");
+    assertNotSame(originalLeafReader.getFieldInfos(), fieldInfos);
+    assertSame("do not rebuild FieldInfo for unaffected fields",
+        originalLeafReader.getFieldInfos().fieldInfo("id"), fieldInfos.fieldInfo("id"));
+
+    FieldInfo intFInfo = fieldInfos.fieldInfo("int");
    assertEquals(DocValuesType.NUMERIC, intFInfo.getDocValuesType());
    assertEquals(0, intFInfo.getPointDimensionCount());
    assertEquals(0, intFInfo.getPointNumBytes());

-    FieldInfo dintFInfo = leafReader.getFieldInfos().fieldInfo("dint");
+    FieldInfo dintFInfo = fieldInfos.fieldInfo("dint");
    assertEquals(DocValuesType.NUMERIC, dintFInfo.getDocValuesType());
    assertEquals(1, dintFInfo.getPointDimensionCount());
    assertEquals(4, dintFInfo.getPointNumBytes());

-    FieldInfo dvFInfo = leafReader.getFieldInfos().fieldInfo("dv");
+    FieldInfo dvFInfo = fieldInfos.fieldInfo("dv");
    assertEquals(DocValuesType.NUMERIC, dvFInfo.getDocValuesType());

-    FieldInfo storedFInfo = leafReader.getFieldInfos().fieldInfo("stored");
+    FieldInfo storedFInfo = fieldInfos.fieldInfo("stored");
    assertEquals(DocValuesType.NONE, storedFInfo.getDocValuesType());

    TestUtil.checkReader(ir);