LUCENE-8557: LeafReader.getFieldInfos should always return the same instance

MemoryIndex: compute/cache up-front Solr Collapse/Expand with top_fc: compute/cache up-front Json Facets numerics / hash DV: use the cached fieldInfos on SolrIndexSearcher SolrIndexSearcher: move the cached FieldInfos to SlowCompositeReaderWrapper
2018-11-06 14:45:32 -05:00 · 2018-11-06 14:45:32 -05:00 · d0cd4245bd
parent b230543b47
commit d0cd4245bd
15 changed files with 95 additions and 135 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -252,6 +252,10 @@ Improvements:
  first 4 are index dimensions defining the bounding box of the Triangle and the
  remaining 3 data dimensions define the vertices of the triangle. (Nick Knize)

+* LUCENE-8557: LeafReader.getFieldInfos is now documented and tested that it ought to return
+  the same cached instance.  MemoryIndex's impl now pre-creates the FieldInfos instead of
+  re-calculating a new instance each time.  (Tim Underwood, David Smiley)
+
 Other:

 * LUCENE-8523: Correct typo in JapaneseNumberFilterFactory javadocs (Ankush Jhalani
--- a/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java
@ -38,6 +38,10 @@ import org.apache.lucene.util.ArrayUtil;
 *  @lucene.experimental
 */
 public class FieldInfos implements Iterable<FieldInfo> {
+
+  /** An instance without any fields. */
+  public final static FieldInfos EMPTY = new FieldInfos(new FieldInfo[0]);
+
  private final boolean hasFreq;
  private final boolean hasProx;
  private final boolean hasPayloads;
--- a/lucene/core/src/java/org/apache/lucene/index/LeafReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/LeafReader.java
@ -206,6 +206,10 @@ public abstract class LeafReader extends IndexReader {
  /**
   * Get the {@link FieldInfos} describing all fields in
   * this reader.
+   *
+   * Note: Implementations should cache the FieldInfos
+   * instance returned by this method such that subsequent
+   * calls to this method return the same instance.
   * @lucene.experimental
   */
  public abstract FieldInfos getFieldInfos();
--- a/lucene/core/src/test/org/apache/lucene/index/TestPendingDeletes.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestPendingDeletes.java
@ -132,7 +132,7 @@ public class TestPendingDeletes extends LuceneTestCase {
    SegmentInfo si = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "test", 3, false, Codec.getDefault(),
        Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);
    SegmentCommitInfo commitInfo = new SegmentCommitInfo(si, 0, 0, -1, -1, -1);
-    FieldInfos fieldInfos = new FieldInfos(new FieldInfo[0]);
+    FieldInfos fieldInfos = FieldInfos.EMPTY;
    si.getCodec().fieldInfosFormat().write(dir, si, "", fieldInfos, IOContext.DEFAULT);
    PendingDeletes deletes = newPendingDeletes(commitInfo);
    for (int i = 0; i < 3; i++) {
--- a/lucene/core/src/test/org/apache/lucene/index/TestTermVectorsReader.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestTermVectorsReader.java
@ -47,7 +47,7 @@ public class TestTermVectorsReader extends LuceneTestCase {
  private int[][] positions = new int[testTerms.length][];
  private Directory dir;
  private SegmentCommitInfo seg;
-  private FieldInfos fieldInfos = new FieldInfos(new FieldInfo[0]);
+  private FieldInfos fieldInfos = FieldInfos.EMPTY;
  private static int TERM_FREQ = 3;

  private static class TestToken implements Comparable<TestToken> {
--- a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
+++ b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
@ -1142,12 +1142,20 @@ public class MemoryIndex {
  private final class MemoryIndexReader extends LeafReader {

    private final MemoryFields memoryFields = new MemoryFields(fields);
+    private final FieldInfos fieldInfos;

    private MemoryIndexReader() {
      super(); // avoid as much superclass baggage as possible
+
+      FieldInfo[] fieldInfosArr = new FieldInfo[fields.size()];
+
+      int i = 0;
      for (Info info : fields.values()) {
        info.prepareDocValuesAndPointValues();
+        fieldInfosArr[i++] = info.fieldInfo;
      }
+
+      fieldInfos = new FieldInfos(fieldInfosArr);
    }

    private Info getInfoForExpectedDocValuesType(String fieldName, DocValuesType expectedType) {
@ -1171,12 +1179,7 @@ public class MemoryIndex {
    
    @Override
    public FieldInfos getFieldInfos() {
-      FieldInfo[] fieldInfos = new FieldInfo[fields.size()];
-      int i = 0;
-      for (Info info : fields.values()) {
-        fieldInfos[i++] = info.fieldInfo;
-      }
-      return new FieldInfos(fieldInfos);
+      return fieldInfos;
    }

    @Override
--- a/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java
@ -22,7 +22,6 @@ import java.util.Random;

 import junit.framework.Assert;
 import org.apache.lucene.index.BinaryDocValues;
-import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FieldInfos;
 import org.apache.lucene.index.Fields;
 import org.apache.lucene.index.IndexReader;
@ -207,7 +206,7 @@ public class QueryUtils {

      @Override
      public FieldInfos getFieldInfos() {
-        return new FieldInfos(new FieldInfo[0]);
+        return FieldInfos.EMPTY;
      }

      final Bits liveDocs = new Bits.MatchNoBits(maxDoc);
--- a/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java
@ -335,6 +335,11 @@ public final class TestUtil {
      }
      assert Accountables.toString(sr) != null;
    }
+
+    // FieldInfos should be cached at the reader and always return the same instance
+    if (reader.getFieldInfos() != reader.getFieldInfos()) {
+      throw new RuntimeException("getFieldInfos() returned different instances for class: "+reader.getClass());
+    }
  }
  
  // used by TestUtil.checkReader to check some things really unrelated to the index,
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -30,6 +30,7 @@ Jetty 9.4.11.v20180605

 Upgrade Notes
 ----------------------
+
 * SOLR-12767: The min_rf parameter is no longer needed, Solr will always return the achieved replication factor (rf)
  in the response header.

@ -187,6 +188,9 @@ Improvements
 * SOLR-12699: Make contrib/ltr LTRScoringModel immutable and cache its hashCode.
  (Stanislav Livotov, Edward Ribeiro, Christine Poerschke)

+* LUCENE-8557: Some internal LeafReader.getFieldInfos implementations were being re-computed on-demand instead of
+  once up front leading to some slowdowns in places like JSON Facets and field collapsing. (Tim Underwood, David Smiley)
+
 ==================  7.5.0 ==================

 Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.
--- a/solr/core/src/java/org/apache/solr/handler/component/ExpandComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/ExpandComponent.java
@ -19,7 +19,6 @@ package org.apache.solr.handler.component;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;

@ -33,10 +32,6 @@ import com.carrotsearch.hppc.cursors.LongCursor;
 import com.carrotsearch.hppc.cursors.LongObjectCursor;
 import com.carrotsearch.hppc.cursors.ObjectCursor;
 import org.apache.lucene.index.DocValues;
-import org.apache.lucene.index.DocValuesType;
-import org.apache.lucene.index.FieldInfo;
-import org.apache.lucene.index.FieldInfos;
-import org.apache.lucene.index.FilterLeafReader;
 import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.MultiDocValues;
@ -84,7 +79,6 @@ import org.apache.solr.search.DocSlice;
 import org.apache.solr.search.QParser;
 import org.apache.solr.search.SolrIndexSearcher;
 import org.apache.solr.search.SortSpecParsing;
-import org.apache.solr.uninverting.UninvertingReader;
 import org.apache.solr.util.plugin.PluginInfoInitialized;
 import org.apache.solr.util.plugin.SolrCoreAware;

@ -214,9 +208,8 @@ public class ExpandComponent extends SearchComponent implements PluginInfoInitia
    if(fieldType instanceof StrField) {
      //Get The Top Level SortedDocValues
      if(CollapsingQParserPlugin.HINT_TOP_FC.equals(hint)) {
-        @SuppressWarnings("resource") LeafReader uninvertingReader = UninvertingReader.wrap(
-            new ReaderWrapper(searcher.getSlowAtomicReader(), field),
-            Collections.singletonMap(field, UninvertingReader.Type.SORTED)::get);
+        @SuppressWarnings("resource")
+        LeafReader uninvertingReader = CollapsingQParserPlugin.getTopFieldCacheReader(searcher, field);
        values = uninvertingReader.getSortedDocValues(field);
      } else {
        values = DocValues.getSorted(reader, field);
@ -384,9 +377,8 @@ public class ExpandComponent extends SearchComponent implements PluginInfoInitia
    if(values != null) {
      //Get The Top Level SortedDocValues again so we can re-iterate:
      if(CollapsingQParserPlugin.HINT_TOP_FC.equals(hint)) {
-        @SuppressWarnings("resource") LeafReader uninvertingReader = UninvertingReader.wrap(
-            new ReaderWrapper(searcher.getSlowAtomicReader(), field),
-            Collections.singletonMap(field, UninvertingReader.Type.SORTED)::get);
+        @SuppressWarnings("resource")
+        LeafReader uninvertingReader = CollapsingQParserPlugin.getTopFieldCacheReader(searcher, field);
        values = uninvertingReader.getSortedDocValues(field);
      } else {
        values = DocValues.getSorted(reader, field);
@ -764,65 +756,4 @@ public class ExpandComponent extends SearchComponent implements PluginInfoInitia
    return Category.QUERY;
  }

-  // this reader alters the content of the given reader so it should not
-  // delegate the caching stuff
-  private static class ReaderWrapper extends FilterLeafReader {
-
-    private String field;
-
-    public ReaderWrapper(LeafReader leafReader, String field) {
-      super(leafReader);
-      this.field = field;
-    }
-
-    public SortedDocValues getSortedDocValues(String field) {
-      return null;
-    }
-
-    public FieldInfos getFieldInfos() {
-      Iterator<FieldInfo> it = in.getFieldInfos().iterator();
-      List<FieldInfo> newInfos = new ArrayList<>();
-      while(it.hasNext()) {
-        FieldInfo fieldInfo = it.next();
-
-        if(fieldInfo.name.equals(field)) {
-          FieldInfo f = new FieldInfo(fieldInfo.name,
-              fieldInfo.number,
-              fieldInfo.hasVectors(),
-              fieldInfo.hasNorms(),
-              fieldInfo.hasPayloads(),
-              fieldInfo.getIndexOptions(),
-              DocValuesType.NONE,
-              fieldInfo.getDocValuesGen(),
-              fieldInfo.attributes(),
-              fieldInfo.getPointDataDimensionCount(),
-              fieldInfo.getPointIndexDimensionCount(),
-              fieldInfo.getPointNumBytes(),
-              fieldInfo.isSoftDeletesField());
-          newInfos.add(f);
-
-        } else {
-          newInfos.add(fieldInfo);
-        }
-      }
-      FieldInfos infos = new FieldInfos(newInfos.toArray(new FieldInfo[newInfos.size()]));
-      return infos;
-    }
-
-    // NOTE: delegating the caches is wrong here as we are altering the content
-    // of the reader, this should ONLY be used under an uninvertingreader which
-    // will restore doc values back using uninversion, otherwise all sorts of
-    // crazy things could happen.
-
-    @Override
-    public CacheHelper getCoreCacheHelper() {
-      return in.getCoreCacheHelper();
-    }
-
-    @Override
-    public CacheHelper getReaderCacheHelper() {
-      return in.getReaderCacheHelper();
-    }
-  }
-
 }
--- a/solr/core/src/java/org/apache/solr/index/SlowCompositeReaderWrapper.java
+++ b/solr/core/src/java/org/apache/solr/index/SlowCompositeReaderWrapper.java
@ -48,6 +48,11 @@ public final class SlowCompositeReaderWrapper extends LeafReader {
  private final CompositeReader in;
  private final LeafMetaData metaData;

+  // Cached copy of FieldInfos to prevent it from being re-created on each
+  // getFieldInfos call.  Most (if not all) other LeafReader implementations
+  // also have a cached FieldInfos instance so this is consistent. SOLR-12878
+  private final FieldInfos fieldInfos;
+
  final Map<String,Terms> cachedTerms = new ConcurrentHashMap<>();

  // TODO: consider ConcurrentHashMap ?
@ -86,6 +91,7 @@ public final class SlowCompositeReaderWrapper extends LeafReader {
      }
      metaData = new LeafMetaData(reader.leaves().get(0).reader().getMetaData().getCreatedVersionMajor(), minVersion, null);
    }
+    fieldInfos = FieldInfos.getMergedFieldInfos(in);
  }

  @Override
@ -273,8 +279,7 @@ public final class SlowCompositeReaderWrapper extends LeafReader {

  @Override
  public FieldInfos getFieldInfos() {
-    ensureOpen();
-    return FieldInfos.getMergedFieldInfos(in); // TODO cache?
+    return fieldInfos;
  }

  @Override
--- a/solr/core/src/java/org/apache/solr/search/CollapsingQParserPlugin.java
+++ b/solr/core/src/java/org/apache/solr/search/CollapsingQParserPlugin.java
@ -387,13 +387,55 @@ public class CollapsingQParserPlugin extends QParserPlugin {

  }

+  /**
+   * This forces the use of the top level field cache for String fields.
+   * This is VERY fast at query time but slower to warm and causes insanity.
+   */
+  public static LeafReader getTopFieldCacheReader(SolrIndexSearcher searcher, String collapseField) {
+    return UninvertingReader.wrap(
+        new ReaderWrapper(searcher.getSlowAtomicReader(), collapseField),
+        Collections.singletonMap(collapseField, UninvertingReader.Type.SORTED)::get);
+  }
+
  private static class ReaderWrapper extends FilterLeafReader {

-    private String field;
+    private final FieldInfos fieldInfos;

-    public ReaderWrapper(LeafReader leafReader, String field) {
+    ReaderWrapper(LeafReader leafReader, String field) {
      super(leafReader);
-      this.field = field;
+
+      // TODO can we just do "field" and not bother with the other fields?
+      List<FieldInfo> newInfos = new ArrayList<>(in.getFieldInfos().size());
+      for (FieldInfo fieldInfo : in.getFieldInfos()) {
+        if (fieldInfo.name.equals(field)) {
+          FieldInfo f = new FieldInfo(fieldInfo.name,
+              fieldInfo.number,
+              fieldInfo.hasVectors(),
+              fieldInfo.hasNorms(),
+              fieldInfo.hasPayloads(),
+              fieldInfo.getIndexOptions(),
+              DocValuesType.NONE,
+              fieldInfo.getDocValuesGen(),
+              fieldInfo.attributes(),
+              fieldInfo.getPointDataDimensionCount(),
+              fieldInfo.getPointIndexDimensionCount(),
+              fieldInfo.getPointNumBytes(),
+              fieldInfo.isSoftDeletesField());
+          newInfos.add(f);
+        } else {
+          newInfos.add(fieldInfo);
+        }
+      }
+      FieldInfos infos = new FieldInfos(newInfos.toArray(new FieldInfo[newInfos.size()]));
+      this.fieldInfos = infos;
+    }
+
+    public FieldInfos getFieldInfos() {
+      return fieldInfos;
+    }
+
+    public SortedDocValues getSortedDocValues(String field) {
+      return null;
    }

    // NOTE: delegating the caches is wrong here as we are altering the content
@ -410,37 +452,6 @@ public class CollapsingQParserPlugin extends QParserPlugin {
    public CacheHelper getReaderCacheHelper() {
      return in.getReaderCacheHelper();
    }
-
-    public SortedDocValues getSortedDocValues(String field) {
-      return null;
-    }
-
-    public FieldInfos getFieldInfos() {
-      Iterator<FieldInfo> it = in.getFieldInfos().iterator();
-      List<FieldInfo> newInfos = new ArrayList();
-      while(it.hasNext()) {
-        FieldInfo fieldInfo = it.next();
-
-        if(fieldInfo.name.equals(field)) {
-          FieldInfo f = new FieldInfo(fieldInfo.name,
-                                      fieldInfo.number,
-                                      fieldInfo.hasVectors(),
-                                      fieldInfo.hasNorms(),
-                                      fieldInfo.hasPayloads(),
-                                      fieldInfo.getIndexOptions(),
-                                      DocValuesType.NONE,
-                                      fieldInfo.getDocValuesGen(),
-                                      fieldInfo.attributes(),
-                                      0, 0, 0, fieldInfo.isSoftDeletesField());
-          newInfos.add(f);
-
-        } else {
-          newInfos.add(fieldInfo);
-        }
-      }
-      FieldInfos infos = new FieldInfos(newInfos.toArray(new FieldInfo[newInfos.size()]));
-      return infos;
-    }
  }


@ -1255,14 +1266,8 @@ public class CollapsingQParserPlugin extends QParserPlugin {

      if(collapseFieldType instanceof StrField) {
        if(HINT_TOP_FC.equals(hint)) {
-
-          /*
-          * This hint forces the use of the top level field cache for String fields.
-          * This is VERY fast at query time but slower to warm and causes insanity.
-          */
-          @SuppressWarnings("resource") final LeafReader uninvertingReader = UninvertingReader.wrap(
-                  new ReaderWrapper(searcher.getSlowAtomicReader(), collapseField),
-                  Collections.singletonMap(collapseField, UninvertingReader.Type.SORTED)::get);
+          @SuppressWarnings("resource")
+          final LeafReader uninvertingReader = getTopFieldCacheReader(searcher, collapseField);

          docValuesProducer = new EmptyDocValuesProducer() {
              @Override
--- a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
+++ b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
@ -126,8 +126,6 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
  // list of all caches associated with this searcher.
  private final SolrCache[] cacheList;

-  private final FieldInfos fieldInfos;
-
  private DirectoryFactory directoryFactory;

  private final LeafReader leafReader;
@ -263,7 +261,6 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
    this.queryResultMaxDocsCached = solrConfig.queryResultMaxDocsCached;
    this.useFilterForSortedQuery = solrConfig.useFilterForSortedQuery;

-    this.fieldInfos = leafReader.getFieldInfos();
    this.docFetcher = new SolrDocumentFetcher(this, solrConfig, enableCache);

    this.cachingEnabled = enableCache;
@ -319,7 +316,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
  }

  public FieldInfos getFieldInfos() {
-    return fieldInfos;
+    return leafReader.getFieldInfos();
  }

  /*
@ -494,7 +491,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
   * Returns a collection of all field names the index reader knows about.
   */
  public Iterable<String> getFieldNames() {
-    return Iterables.transform(fieldInfos, fieldInfo -> fieldInfo.name);
+    return Iterables.transform(getFieldInfos(), fieldInfo -> fieldInfo.name);
  }

  public SolrCache<Query,DocSet> getFilterCache() {
--- a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByHashDV.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByHashDV.java
@ -199,7 +199,7 @@ class FacetFieldProcessorByHashDV extends FacetFieldProcessor {
      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
          getClass()+" doesn't support prefix"); // yet, but it could
    }
-    FieldInfo fieldInfo = fcontext.searcher.getSlowAtomicReader().getFieldInfos().fieldInfo(sf.getName());
+    FieldInfo fieldInfo = fcontext.searcher.getFieldInfos().fieldInfo(sf.getName());
    if (fieldInfo != null &&
        fieldInfo.getDocValuesType() != DocValuesType.NUMERIC &&
        fieldInfo.getDocValuesType() != DocValuesType.SORTED &&
--- a/solr/core/src/test/org/apache/solr/search/TestDocSet.java
+++ b/solr/core/src/test/org/apache/solr/search/TestDocSet.java
@ -22,7 +22,6 @@ import java.util.List;
 import java.util.Random;

 import org.apache.lucene.index.BinaryDocValues;
-import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FieldInfos;
 import org.apache.lucene.index.Fields;
 import org.apache.lucene.index.IndexReader;
@ -391,7 +390,7 @@ public class TestDocSet extends LuceneTestCase {

      @Override
      public FieldInfos getFieldInfos() {
-        return new FieldInfos(new FieldInfo[0]);
+        return FieldInfos.EMPTY;
      }

      @Override