SOLR-8344: Decide default when requested fields are both column and row stored.

2017-09-19 06:02:14 +07:00 · 2017-09-19 06:02:14 +07:00 · 40f78dd274
parent 2e5f9a4369
commit 40f78dd274
5 changed files with 230 additions and 72 deletions
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -145,6 +145,8 @@ Optimizations

 * SOLR-11314: FastCharStream: re-use the READ_PAST_EOF exception. (Michael Braun via David Smiley)

+* SOLR-8344: Decide default when requested fields are both column and row stored. (Cao Manh Dat, David Smiley)
+
 Other Changes
 ----------------------

--- a/solr/core/src/java/org/apache/solr/response/DocsStreamer.java
+++ b/solr/core/src/java/org/apache/solr/response/DocsStreamer.java
@ -49,9 +49,7 @@ import org.apache.solr.schema.TrieIntField;
 import org.apache.solr.schema.TrieLongField;
 import org.apache.solr.search.DocIterator;
 import org.apache.solr.search.DocList;
-import org.apache.solr.search.ReturnFields;
 import org.apache.solr.search.SolrDocumentFetcher;
-import org.apache.solr.search.SolrReturnFields;

 /**
 * This streams SolrDocuments from a DocList and applies transformer
@ -66,9 +64,7 @@ public class DocsStreamer implements Iterator<SolrDocument> {
  private final DocTransformer transformer;
  private final DocIterator docIterator;

-  private final Set<String> fnames; // returnFields.getLuceneFieldNames(). Maybe null. Not empty.
-  private final boolean onlyPseudoFields;
-  private final Set<String> dvFieldsToReturn; // maybe null. Not empty.
+  private final RetrieveFieldsOptimizer retrieveFieldsOptimizer;

  private int idx = -1;

@ -77,63 +73,13 @@ public class DocsStreamer implements Iterator<SolrDocument> {
    this.docs = rctx.getDocList();
    transformer = rctx.getReturnFields().getTransformer();
    docIterator = this.docs.iterator();
-    fnames = rctx.getReturnFields().getLuceneFieldNames();
-    //TODO move onlyPseudoFields calc to ReturnFields
-    onlyPseudoFields = (fnames == null && !rctx.getReturnFields().wantsAllFields() && !rctx.getReturnFields().hasPatternMatching())
-        || (fnames != null && fnames.size() == 1 && SolrReturnFields.SCORE.equals(fnames.iterator().next()));
-
-    // add non-stored DV fields that may have been requested
    docFetcher = rctx.getSearcher().getDocFetcher();
-    dvFieldsToReturn = calcDocValueFieldsForReturn(docFetcher, rctx.getReturnFields());

+    retrieveFieldsOptimizer = RetrieveFieldsOptimizer.create(docFetcher, rctx.getReturnFields());
+    retrieveFieldsOptimizer.optimize(docFetcher);
    if (transformer != null) transformer.setContext(rctx);
  }

-  // TODO move to ReturnFields ?  Or SolrDocumentFetcher ?
-  public static Set<String> calcDocValueFieldsForReturn(SolrDocumentFetcher docFetcher, ReturnFields returnFields) {
-    Set<String> result = null;
-    if (returnFields.wantsAllFields()) {
-      // check whether there are no additional fields
-      Set<String> fieldNames = returnFields.getLuceneFieldNames(true);
-      if (fieldNames == null) {
-        result = docFetcher.getNonStoredDVs(true);
-      } else {
-        result = new HashSet<>(docFetcher.getNonStoredDVs(true)); // copy
-        // add all requested fields that may be useDocValuesAsStored=false
-        for (String fl : fieldNames) {
-          if (docFetcher.getNonStoredDVs(false).contains(fl)) {
-            result.add(fl);
-          }
-        }
-      }
-    } else {
-      if (returnFields.hasPatternMatching()) {
-        for (String s : docFetcher.getNonStoredDVs(true)) {
-          if (returnFields.wantsField(s)) {
-            if (null == result) {
-              result = new HashSet<>();
-            }
-            result.add(s);
-          }
-        }
-      } else {
-        Set<String> fnames = returnFields.getLuceneFieldNames();
-        if (fnames == null) {
-          return null;
-        }
-        result = new HashSet<>(fnames); // copy
-        // here we get all non-stored dv fields because even if a user has set
-        // useDocValuesAsStored=false in schema, he may have requested a field
-        // explicitly using the fl parameter
-        result.retainAll(docFetcher.getNonStoredDVs(false));
-      }
-    }
-    if (result != null && result.isEmpty()) {
-      return null;
-    }
-    return result;
-  }
-
  public int currentIndex() {
    return idx;
  }
@ -147,21 +93,22 @@ public class DocsStreamer implements Iterator<SolrDocument> {
    idx++;
    SolrDocument sdoc = null;

-    if (onlyPseudoFields) {
-      // no need to get stored fields of the document, see SOLR-5968
-      sdoc = new SolrDocument();
-    } else {
-      try {
-        Document doc = docFetcher.doc(id, fnames);
-        sdoc = convertLuceneDocToSolrDoc(doc, rctx.getSearcher().getSchema()); // make sure to use the schema from the searcher and not the request (cross-core)
-
-        // decorate the document with non-stored docValues fields
-        if (dvFieldsToReturn != null) {
-          docFetcher.decorateDocValueFields(sdoc, id, dvFieldsToReturn);
-        }
-      } catch (IOException e) {
-        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error reading document with docId " + id, e);
+    try {
+      if (retrieveFieldsOptimizer.returnStoredFields()) {
+        Document doc = docFetcher.doc(id, retrieveFieldsOptimizer.getStoredFields());
+        // make sure to use the schema from the searcher and not the request (cross-core)
+        sdoc = convertLuceneDocToSolrDoc(doc, rctx.getSearcher().getSchema());
+      } else {
+        // no need to get stored fields of the document, see SOLR-5968
+        sdoc = new SolrDocument();
      }
+
+      // decorate the document with non-stored docValues fields
+      if (retrieveFieldsOptimizer.returnDVFields()) {
+        docFetcher.decorateDocValueFields(sdoc, id, retrieveFieldsOptimizer.getDvFields());
+      }
+    } catch (IOException e) {
+      throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error reading document with docId " + id, e);
    }

    if (transformer != null) {
--- a/solr/core/src/java/org/apache/solr/response/RetrieveFieldsOptimizer.java
+++ b/solr/core/src/java/org/apache/solr/response/RetrieveFieldsOptimizer.java
@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.response;
+
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.solr.search.ReturnFields;
+import org.apache.solr.search.SolrDocumentFetcher;
+import org.apache.solr.search.SolrReturnFields;
+
+public class RetrieveFieldsOptimizer {
+  // null means get all available stored fields
+  private final Set<String> storedFields;
+  // always non null
+  private final Set<String> dvFields;
+
+  RetrieveFieldsOptimizer(Set<String> storedFields, Set<String> dvFields) {
+    this.storedFields = storedFields;
+    this.dvFields = dvFields;
+  }
+
+  /**
+   * Sometimes we could fetch a field value from either the stored document or docValues.
+   * Such fields have both and are single-valued.
+   * If choosing docValues allows us to avoid accessing the stored document altogether
+   * for all fields to be returned then we do it,
+   * otherwise we prefer the stored value when we have a choice.
+   */
+  void optimize(SolrDocumentFetcher docFetcher) {
+    optimize(docFetcher.getAllSingleDV());
+  }
+
+  void optimize(Set<String> singleDVs) {
+    if (storedFields == null) return;
+    if (!singleDVs.containsAll(storedFields)) return;
+    dvFields.addAll(storedFields);
+    storedFields.clear();
+  }
+
+  boolean returnStoredFields() {
+    return !(storedFields != null && storedFields.isEmpty());
+  }
+
+  boolean returnDVFields() {
+    return !dvFields.isEmpty();
+  }
+
+  Set<String> getStoredFields() {
+    return storedFields;
+  }
+
+  Set<String> getDvFields() {
+    return dvFields;
+  }
+
+  public static RetrieveFieldsOptimizer create(SolrDocumentFetcher docFetcher, ReturnFields returnFields) {
+    Set<String> storedFields = calcStoredFieldsForReturn(docFetcher, returnFields);
+    Set<String> dvFields = calcDocValueFieldsForReturn(docFetcher, returnFields);
+
+    return new RetrieveFieldsOptimizer(storedFields, dvFields);
+  }
+
+  private static Set<String> calcStoredFieldsForReturn(SolrDocumentFetcher docFetcher, ReturnFields returnFields) {
+    final Set<String> storedFields = new HashSet<>();
+    Set<String> fnames = returnFields.getLuceneFieldNames();
+    if (returnFields.wantsAllFields()) {
+      return null;
+    } else if (returnFields.hasPatternMatching()) {
+      for (String s : docFetcher.getAllStored()) {
+        if (returnFields.wantsField(s)) {
+          storedFields.add(s);
+        }
+      }
+    } else if (fnames != null) {
+      storedFields.addAll(fnames);
+    }
+    storedFields.remove(SolrReturnFields.SCORE);
+    return storedFields;
+  }
+
+  private static Set<String> calcDocValueFieldsForReturn(SolrDocumentFetcher docFetcher, ReturnFields returnFields) {
+    // always return not null
+    final Set<String> result = new HashSet<>();
+    if (returnFields.wantsAllFields()) {
+      result.addAll(docFetcher.getNonStoredDVs(true));
+      // check whether there are no additional fields
+      Set<String> fieldNames = returnFields.getLuceneFieldNames(true);
+      if (fieldNames != null) {
+        // add all requested fields that may be useDocValuesAsStored=false
+        for (String fl : fieldNames) {
+          if (docFetcher.getNonStoredDVs(false).contains(fl)) {
+            result.add(fl);
+          }
+        }
+      }
+    } else if (returnFields.hasPatternMatching()) {
+      for (String s : docFetcher.getNonStoredDVs(true)) {
+        if (returnFields.wantsField(s)) {
+          result.add(s);
+        }
+      }
+    } else {
+      Set<String> fnames = returnFields.getLuceneFieldNames();
+      if (fnames != null) {
+        result.addAll(fnames);
+        // here we get all non-stored dv fields because even if a user has set
+        // useDocValuesAsStored=false in schema, he may have requested a field
+        // explicitly using the fl parameter
+        result.retainAll(docFetcher.getNonStoredDVs(false));
+      }
+    }
+    return result;
+  }
+}
--- a/solr/core/src/java/org/apache/solr/search/SolrDocumentFetcher.java
+++ b/solr/core/src/java/org/apache/solr/search/SolrDocumentFetcher.java
@ -80,6 +80,10 @@ public class SolrDocumentFetcher {

  private final SolrCache<Integer,Document> documentCache;

+  private final Set<String> allStored;
+
+  private final Set<String> allSingleDV;
+
  /** Contains the names/patterns of all docValues=true,stored=false fields in the schema. */
  private final Set<String> allNonStoredDVs;

@ -108,12 +112,20 @@ public class SolrDocumentFetcher {
    final Set<String> allNonStoredDVs = new HashSet<>();
    final Set<String> nonStoredDVsWithoutCopyTargets = new HashSet<>();
    final Set<String> storedLargeFields = new HashSet<>();
+    final Set<String> allSingleDVs = new HashSet<>();
+    final Set<String> allStoreds = new HashSet<>();

    for (FieldInfo fieldInfo : searcher.getFieldInfos()) { // can find materialized dynamic fields, unlike using the Solr IndexSchema.
      final SchemaField schemaField = searcher.getSchema().getFieldOrNull(fieldInfo.name);
      if (schemaField == null) {
        continue;
      }
+      if (schemaField.hasDocValues() && !schemaField.multiValued()) {
+        allSingleDVs.add(fieldInfo.name);
+      }
+      if (schemaField.stored()) {
+        allStoreds.add(fieldInfo.name);
+      }
      if (!schemaField.stored() && schemaField.hasDocValues()) {
        if (schemaField.useDocValuesAsStored()) {
          nonStoredDVsUsedAsStored.add(fieldInfo.name);
@ -132,6 +144,8 @@ public class SolrDocumentFetcher {
    this.allNonStoredDVs = Collections.unmodifiableSet(allNonStoredDVs);
    this.nonStoredDVsWithoutCopyTargets = Collections.unmodifiableSet(nonStoredDVsWithoutCopyTargets);
    this.largeFields = Collections.unmodifiableSet(storedLargeFields);
+    this.allSingleDV = Collections.unmodifiableSet(allSingleDVs);
+    this.allStored = Collections.unmodifiableSet(allStoreds);
  }

  public boolean isLazyFieldLoadingEnabled() {
@ -412,7 +426,7 @@ public class SolrDocumentFetcher {
    final LeafReader leafReader = leafContexts.get(subIndex).reader();
    for (String fieldName : fields) {
      final SchemaField schemaField = searcher.getSchema().getFieldOrNull(fieldName);
-      if (schemaField == null || !schemaField.hasDocValues() || doc.containsKey(fieldName)) {
+      if (schemaField == null || !schemaField.hasDocValues()) {
        log.warn("Couldn't decorate docValues for field: [{}], schemaField: [{}]", fieldName, schemaField);
        continue;
      }
@ -420,6 +434,7 @@ public class SolrDocumentFetcher {
      if (fi == null) {
        continue; // Searcher doesn't have info about this field, hence ignore it.
      }
+      doc.remove(fieldName);
      final DocValuesType dvType = fi.getDocValuesType();
      switch (dvType) {
        case NUMERIC:
@ -555,6 +570,14 @@ public class SolrDocumentFetcher {
    }
  }

+  public Set<String> getAllSingleDV() {
+    return allSingleDV;
+  }
+
+  public Set<String> getAllStored() {
+    return allStored;
+  }
+
  /**
   * Returns an unmodifiable set of non-stored docValues field names.
   *
--- a/solr/core/src/test/org/apache/solr/response/TestRetrieveFieldsOptimizer.java
+++ b/solr/core/src/test/org/apache/solr/response/TestRetrieveFieldsOptimizer.java
@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.response;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+
+import org.apache.solr.SolrTestCaseJ4;
+import org.junit.Test;
+
+public class TestRetrieveFieldsOptimizer extends SolrTestCaseJ4{
+
+  @Test
+  public void testOptimizer() {
+    RetrieveFieldsOptimizer optimizer = new RetrieveFieldsOptimizer(
+        new HashSet<>(Arrays.asList("id", "title")),
+        new HashSet<>()
+    );
+    optimizer.optimize(new HashSet<>(Arrays.asList("id", "title")));
+    assertTrue(optimizer.returnDVFields());
+    assertFalse(optimizer.returnStoredFields());
+
+    optimizer = new RetrieveFieldsOptimizer(
+        new HashSet<>(Arrays.asList("id", "title")),
+        new HashSet<>()
+    );
+    optimizer.optimize(new HashSet<>(Collections.singletonList("title")));
+    assertFalse(optimizer.returnDVFields());
+    assertTrue(optimizer.returnStoredFields());
+
+    optimizer = new RetrieveFieldsOptimizer(
+        null,
+        new HashSet<>(Collections.singletonList("id"))
+    );
+    optimizer.optimize(new HashSet<>(Collections.singletonList("id")));
+    assertNull(optimizer.getStoredFields());
+    assertTrue(optimizer.getDvFields().contains("id"));
+
+  }
+}