SOLR-8344: Decide default when requested fields are both column and row stored.

2017-09-19 06:02:14 +07:00 · 2017-09-19 06:02:14 +07:00 · 40f78dd274
parent 2e5f9a4369
commit 40f78dd274
5 changed files with 230 additions and 72 deletions
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -145,6 +145,8 @@ Optimizations
 * SOLR-11314: FastCharStream: re-use the READ_PAST_EOF exception. (Michael Braun via David Smiley)
 * SOLR-8344: Decide default when requested fields are both column and row stored. (Cao Manh Dat, David Smiley)
 Other Changes
 ----------------------
--- a/solr/core/src/java/org/apache/solr/response/DocsStreamer.java
+++ b/solr/core/src/java/org/apache/solr/response/DocsStreamer.java
@ -49,9 +49,7 @@ import org.apache.solr.schema.TrieIntField;
 import org.apache.solr.schema.TrieLongField;
 import org.apache.solr.search.DocIterator;
 import org.apache.solr.search.DocList;
 import org.apache.solr.search.ReturnFields;
 import org.apache.solr.search.SolrDocumentFetcher;
 import org.apache.solr.search.SolrReturnFields;
 /**
 * This streams SolrDocuments from a DocList and applies transformer
@ -66,9 +64,7 @@ public class DocsStreamer implements Iterator<SolrDocument> {
  private final DocTransformer transformer;
  private final DocIterator docIterator;
-  private final Set<String> fnames; // returnFields.getLuceneFieldNames(). Maybe null. Not empty.
+  private final RetrieveFieldsOptimizer retrieveFieldsOptimizer;
  private final boolean onlyPseudoFields;
  private final Set<String> dvFieldsToReturn; // maybe null. Not empty.
  private int idx = -1;
@ -77,63 +73,13 @@ public class DocsStreamer implements Iterator<SolrDocument> {
    this.docs = rctx.getDocList();
    transformer = rctx.getReturnFields().getTransformer();
    docIterator = this.docs.iterator();
    fnames = rctx.getReturnFields().getLuceneFieldNames();
    //TODO move onlyPseudoFields calc to ReturnFields
    onlyPseudoFields = (fnames == null && !rctx.getReturnFields().wantsAllFields() && !rctx.getReturnFields().hasPatternMatching())
        || (fnames != null && fnames.size() == 1 && SolrReturnFields.SCORE.equals(fnames.iterator().next()));
    // add non-stored DV fields that may have been requested
    docFetcher = rctx.getSearcher().getDocFetcher();
    dvFieldsToReturn = calcDocValueFieldsForReturn(docFetcher, rctx.getReturnFields());
    retrieveFieldsOptimizer = RetrieveFieldsOptimizer.create(docFetcher, rctx.getReturnFields());
    retrieveFieldsOptimizer.optimize(docFetcher);
    if (transformer != null) transformer.setContext(rctx);
  }
  // TODO move to ReturnFields ?  Or SolrDocumentFetcher ?
  public static Set<String> calcDocValueFieldsForReturn(SolrDocumentFetcher docFetcher, ReturnFields returnFields) {
    Set<String> result = null;
    if (returnFields.wantsAllFields()) {
      // check whether there are no additional fields
      Set<String> fieldNames = returnFields.getLuceneFieldNames(true);
      if (fieldNames == null) {
        result = docFetcher.getNonStoredDVs(true);
      } else {
        result = new HashSet<>(docFetcher.getNonStoredDVs(true)); // copy
        // add all requested fields that may be useDocValuesAsStored=false
        for (String fl : fieldNames) {
          if (docFetcher.getNonStoredDVs(false).contains(fl)) {
            result.add(fl);
          }
        }
      }
    } else {
      if (returnFields.hasPatternMatching()) {
        for (String s : docFetcher.getNonStoredDVs(true)) {
          if (returnFields.wantsField(s)) {
            if (null == result) {
              result = new HashSet<>();
            }
            result.add(s);
          }
        }
      } else {
        Set<String> fnames = returnFields.getLuceneFieldNames();
        if (fnames == null) {
          return null;
        }
        result = new HashSet<>(fnames); // copy
        // here we get all non-stored dv fields because even if a user has set
        // useDocValuesAsStored=false in schema, he may have requested a field
        // explicitly using the fl parameter
        result.retainAll(docFetcher.getNonStoredDVs(false));
      }
    }
    if (result != null && result.isEmpty()) {
      return null;
    }
    return result;
  }
  public int currentIndex() {
    return idx;
  }
@ -147,21 +93,22 @@ public class DocsStreamer implements Iterator<SolrDocument> {
    idx++;
    SolrDocument sdoc = null;
-    if (onlyPseudoFields) {
+    try {
-      // no need to get stored fields of the document, see SOLR-5968
+      if (retrieveFieldsOptimizer.returnStoredFields()) {
-      sdoc = new SolrDocument();
+        Document doc = docFetcher.doc(id, retrieveFieldsOptimizer.getStoredFields());
-    } else {
+        // make sure to use the schema from the searcher and not the request (cross-core)
-      try {
+        sdoc = convertLuceneDocToSolrDoc(doc, rctx.getSearcher().getSchema());
-        Document doc = docFetcher.doc(id, fnames);
+      } else {
-        sdoc = convertLuceneDocToSolrDoc(doc, rctx.getSearcher().getSchema()); // make sure to use the schema from the searcher and not the request (cross-core)
+        // no need to get stored fields of the document, see SOLR-5968
-
+        sdoc = new SolrDocument();
        // decorate the document with non-stored docValues fields
        if (dvFieldsToReturn != null) {
          docFetcher.decorateDocValueFields(sdoc, id, dvFieldsToReturn);
        }
      } catch (IOException e) {
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error reading document with docId " + id, e);
      }
      // decorate the document with non-stored docValues fields
      if (retrieveFieldsOptimizer.returnDVFields()) {
        docFetcher.decorateDocValueFields(sdoc, id, retrieveFieldsOptimizer.getDvFields());
      }
    } catch (IOException e) {
      throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error reading document with docId " + id, e);
    }
    if (transformer != null) {
--- a/solr/core/src/java/org/apache/solr/response/RetrieveFieldsOptimizer.java
+++ b/solr/core/src/java/org/apache/solr/response/RetrieveFieldsOptimizer.java
@ -0,0 +1,130 @@
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.solr.response;
 import java.util.HashSet;
 import java.util.Set;
 import org.apache.solr.search.ReturnFields;
 import org.apache.solr.search.SolrDocumentFetcher;
 import org.apache.solr.search.SolrReturnFields;
 public class RetrieveFieldsOptimizer {
  // null means get all available stored fields
  private final Set<String> storedFields;
  // always non null
  private final Set<String> dvFields;
  RetrieveFieldsOptimizer(Set<String> storedFields, Set<String> dvFields) {
    this.storedFields = storedFields;
    this.dvFields = dvFields;
  }
  /**
   * Sometimes we could fetch a field value from either the stored document or docValues.
   * Such fields have both and are single-valued.
   * If choosing docValues allows us to avoid accessing the stored document altogether
   * for all fields to be returned then we do it,
   * otherwise we prefer the stored value when we have a choice.
   */
  void optimize(SolrDocumentFetcher docFetcher) {
    optimize(docFetcher.getAllSingleDV());
  }
  void optimize(Set<String> singleDVs) {
    if (storedFields == null) return;
    if (!singleDVs.containsAll(storedFields)) return;
    dvFields.addAll(storedFields);
    storedFields.clear();
  }
  boolean returnStoredFields() {
    return !(storedFields != null && storedFields.isEmpty());
  }
  boolean returnDVFields() {
    return !dvFields.isEmpty();
  }
  Set<String> getStoredFields() {
    return storedFields;
  }
  Set<String> getDvFields() {
    return dvFields;
  }
  public static RetrieveFieldsOptimizer create(SolrDocumentFetcher docFetcher, ReturnFields returnFields) {
    Set<String> storedFields = calcStoredFieldsForReturn(docFetcher, returnFields);
    Set<String> dvFields = calcDocValueFieldsForReturn(docFetcher, returnFields);
    return new RetrieveFieldsOptimizer(storedFields, dvFields);
  }
  private static Set<String> calcStoredFieldsForReturn(SolrDocumentFetcher docFetcher, ReturnFields returnFields) {
    final Set<String> storedFields = new HashSet<>();
    Set<String> fnames = returnFields.getLuceneFieldNames();
    if (returnFields.wantsAllFields()) {
      return null;
    } else if (returnFields.hasPatternMatching()) {
      for (String s : docFetcher.getAllStored()) {
        if (returnFields.wantsField(s)) {
          storedFields.add(s);
        }
      }
    } else if (fnames != null) {
      storedFields.addAll(fnames);
    }
    storedFields.remove(SolrReturnFields.SCORE);
    return storedFields;
  }
  private static Set<String> calcDocValueFieldsForReturn(SolrDocumentFetcher docFetcher, ReturnFields returnFields) {
    // always return not null
    final Set<String> result = new HashSet<>();
    if (returnFields.wantsAllFields()) {
      result.addAll(docFetcher.getNonStoredDVs(true));
      // check whether there are no additional fields
      Set<String> fieldNames = returnFields.getLuceneFieldNames(true);
      if (fieldNames != null) {
        // add all requested fields that may be useDocValuesAsStored=false
        for (String fl : fieldNames) {
          if (docFetcher.getNonStoredDVs(false).contains(fl)) {
            result.add(fl);
          }
        }
      }
    } else if (returnFields.hasPatternMatching()) {
      for (String s : docFetcher.getNonStoredDVs(true)) {
        if (returnFields.wantsField(s)) {
          result.add(s);
        }
      }
    } else {
      Set<String> fnames = returnFields.getLuceneFieldNames();
      if (fnames != null) {
        result.addAll(fnames);
        // here we get all non-stored dv fields because even if a user has set
        // useDocValuesAsStored=false in schema, he may have requested a field
        // explicitly using the fl parameter
        result.retainAll(docFetcher.getNonStoredDVs(false));
      }
    }
    return result;
  }
 }
--- a/solr/core/src/java/org/apache/solr/search/SolrDocumentFetcher.java
+++ b/solr/core/src/java/org/apache/solr/search/SolrDocumentFetcher.java
@ -80,6 +80,10 @@ public class SolrDocumentFetcher {
  private final SolrCache<Integer,Document> documentCache;
  private final Set<String> allStored;
  private final Set<String> allSingleDV;
  /** Contains the names/patterns of all docValues=true,stored=false fields in the schema. */
  private final Set<String> allNonStoredDVs;
@ -108,12 +112,20 @@ public class SolrDocumentFetcher {
    final Set<String> allNonStoredDVs = new HashSet<>();
    final Set<String> nonStoredDVsWithoutCopyTargets = new HashSet<>();
    final Set<String> storedLargeFields = new HashSet<>();
    final Set<String> allSingleDVs = new HashSet<>();
    final Set<String> allStoreds = new HashSet<>();
    for (FieldInfo fieldInfo : searcher.getFieldInfos()) { // can find materialized dynamic fields, unlike using the Solr IndexSchema.
      final SchemaField schemaField = searcher.getSchema().getFieldOrNull(fieldInfo.name);
      if (schemaField == null) {
        continue;
      }
      if (schemaField.hasDocValues() && !schemaField.multiValued()) {
        allSingleDVs.add(fieldInfo.name);
      }
      if (schemaField.stored()) {
        allStoreds.add(fieldInfo.name);
      }
      if (!schemaField.stored() && schemaField.hasDocValues()) {
        if (schemaField.useDocValuesAsStored()) {
          nonStoredDVsUsedAsStored.add(fieldInfo.name);
@ -132,6 +144,8 @@ public class SolrDocumentFetcher {
    this.allNonStoredDVs = Collections.unmodifiableSet(allNonStoredDVs);
    this.nonStoredDVsWithoutCopyTargets = Collections.unmodifiableSet(nonStoredDVsWithoutCopyTargets);
    this.largeFields = Collections.unmodifiableSet(storedLargeFields);
    this.allSingleDV = Collections.unmodifiableSet(allSingleDVs);
    this.allStored = Collections.unmodifiableSet(allStoreds);
  }
  public boolean isLazyFieldLoadingEnabled() {
@ -412,7 +426,7 @@ public class SolrDocumentFetcher {
    final LeafReader leafReader = leafContexts.get(subIndex).reader();
    for (String fieldName : fields) {
      final SchemaField schemaField = searcher.getSchema().getFieldOrNull(fieldName);
-      if (schemaField == null || !schemaField.hasDocValues() || doc.containsKey(fieldName)) {
+      if (schemaField == null || !schemaField.hasDocValues()) {
        log.warn("Couldn't decorate docValues for field: [{}], schemaField: [{}]", fieldName, schemaField);
        continue;
      }
@ -420,6 +434,7 @@ public class SolrDocumentFetcher {
      if (fi == null) {
        continue; // Searcher doesn't have info about this field, hence ignore it.
      }
      doc.remove(fieldName);
      final DocValuesType dvType = fi.getDocValuesType();
      switch (dvType) {
        case NUMERIC:
@ -555,6 +570,14 @@ public class SolrDocumentFetcher {
    }
  }
  public Set<String> getAllSingleDV() {
    return allSingleDV;
  }
  public Set<String> getAllStored() {
    return allStored;
  }
  /**
   * Returns an unmodifiable set of non-stored docValues field names.
   *
--- a/solr/core/src/test/org/apache/solr/response/TestRetrieveFieldsOptimizer.java
+++ b/solr/core/src/test/org/apache/solr/response/TestRetrieveFieldsOptimizer.java
@ -0,0 +1,56 @@
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.solr.response;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashSet;
 import org.apache.solr.SolrTestCaseJ4;
 import org.junit.Test;
 public class TestRetrieveFieldsOptimizer extends SolrTestCaseJ4{
  @Test
  public void testOptimizer() {
    RetrieveFieldsOptimizer optimizer = new RetrieveFieldsOptimizer(
        new HashSet<>(Arrays.asList("id", "title")),
        new HashSet<>()
    );
    optimizer.optimize(new HashSet<>(Arrays.asList("id", "title")));
    assertTrue(optimizer.returnDVFields());
    assertFalse(optimizer.returnStoredFields());
    optimizer = new RetrieveFieldsOptimizer(
        new HashSet<>(Arrays.asList("id", "title")),
        new HashSet<>()
    );
    optimizer.optimize(new HashSet<>(Collections.singletonList("title")));
    assertFalse(optimizer.returnDVFields());
    assertTrue(optimizer.returnStoredFields());
    optimizer = new RetrieveFieldsOptimizer(
        null,
        new HashSet<>(Collections.singletonList("id"))
    );
    optimizer.optimize(new HashSet<>(Collections.singletonList("id")));
    assertNull(optimizer.getStoredFields());
    assertTrue(optimizer.getDvFields().contains("id"));
  }
 }