SOLR-8344: Decide default when requested fields are both column and row stored.

This commit is contained in:
Cao Manh Dat 2017-09-19 06:02:14 +07:00
parent 2e5f9a4369
commit 40f78dd274
5 changed files with 230 additions and 72 deletions

View File

@ -145,6 +145,8 @@ Optimizations
* SOLR-11314: FastCharStream: re-use the READ_PAST_EOF exception. (Michael Braun via David Smiley) * SOLR-11314: FastCharStream: re-use the READ_PAST_EOF exception. (Michael Braun via David Smiley)
* SOLR-8344: Decide default when requested fields are both column and row stored. (Cao Manh Dat, David Smiley)
Other Changes Other Changes
---------------------- ----------------------

View File

@ -49,9 +49,7 @@ import org.apache.solr.schema.TrieIntField;
import org.apache.solr.schema.TrieLongField; import org.apache.solr.schema.TrieLongField;
import org.apache.solr.search.DocIterator; import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocList; import org.apache.solr.search.DocList;
import org.apache.solr.search.ReturnFields;
import org.apache.solr.search.SolrDocumentFetcher; import org.apache.solr.search.SolrDocumentFetcher;
import org.apache.solr.search.SolrReturnFields;
/** /**
* This streams SolrDocuments from a DocList and applies transformer * This streams SolrDocuments from a DocList and applies transformer
@ -66,9 +64,7 @@ public class DocsStreamer implements Iterator<SolrDocument> {
private final DocTransformer transformer; private final DocTransformer transformer;
private final DocIterator docIterator; private final DocIterator docIterator;
private final Set<String> fnames; // returnFields.getLuceneFieldNames(). Maybe null. Not empty. private final RetrieveFieldsOptimizer retrieveFieldsOptimizer;
private final boolean onlyPseudoFields;
private final Set<String> dvFieldsToReturn; // maybe null. Not empty.
private int idx = -1; private int idx = -1;
@ -77,63 +73,13 @@ public class DocsStreamer implements Iterator<SolrDocument> {
this.docs = rctx.getDocList(); this.docs = rctx.getDocList();
transformer = rctx.getReturnFields().getTransformer(); transformer = rctx.getReturnFields().getTransformer();
docIterator = this.docs.iterator(); docIterator = this.docs.iterator();
fnames = rctx.getReturnFields().getLuceneFieldNames();
//TODO move onlyPseudoFields calc to ReturnFields
onlyPseudoFields = (fnames == null && !rctx.getReturnFields().wantsAllFields() && !rctx.getReturnFields().hasPatternMatching())
|| (fnames != null && fnames.size() == 1 && SolrReturnFields.SCORE.equals(fnames.iterator().next()));
// add non-stored DV fields that may have been requested
docFetcher = rctx.getSearcher().getDocFetcher(); docFetcher = rctx.getSearcher().getDocFetcher();
dvFieldsToReturn = calcDocValueFieldsForReturn(docFetcher, rctx.getReturnFields());
retrieveFieldsOptimizer = RetrieveFieldsOptimizer.create(docFetcher, rctx.getReturnFields());
retrieveFieldsOptimizer.optimize(docFetcher);
if (transformer != null) transformer.setContext(rctx); if (transformer != null) transformer.setContext(rctx);
} }
// TODO move to ReturnFields ? Or SolrDocumentFetcher ?
public static Set<String> calcDocValueFieldsForReturn(SolrDocumentFetcher docFetcher, ReturnFields returnFields) {
Set<String> result = null;
if (returnFields.wantsAllFields()) {
// check whether there are no additional fields
Set<String> fieldNames = returnFields.getLuceneFieldNames(true);
if (fieldNames == null) {
result = docFetcher.getNonStoredDVs(true);
} else {
result = new HashSet<>(docFetcher.getNonStoredDVs(true)); // copy
// add all requested fields that may be useDocValuesAsStored=false
for (String fl : fieldNames) {
if (docFetcher.getNonStoredDVs(false).contains(fl)) {
result.add(fl);
}
}
}
} else {
if (returnFields.hasPatternMatching()) {
for (String s : docFetcher.getNonStoredDVs(true)) {
if (returnFields.wantsField(s)) {
if (null == result) {
result = new HashSet<>();
}
result.add(s);
}
}
} else {
Set<String> fnames = returnFields.getLuceneFieldNames();
if (fnames == null) {
return null;
}
result = new HashSet<>(fnames); // copy
// here we get all non-stored dv fields because even if a user has set
// useDocValuesAsStored=false in schema, he may have requested a field
// explicitly using the fl parameter
result.retainAll(docFetcher.getNonStoredDVs(false));
}
}
if (result != null && result.isEmpty()) {
return null;
}
return result;
}
public int currentIndex() { public int currentIndex() {
return idx; return idx;
} }
@ -147,21 +93,22 @@ public class DocsStreamer implements Iterator<SolrDocument> {
idx++; idx++;
SolrDocument sdoc = null; SolrDocument sdoc = null;
if (onlyPseudoFields) { try {
// no need to get stored fields of the document, see SOLR-5968 if (retrieveFieldsOptimizer.returnStoredFields()) {
sdoc = new SolrDocument(); Document doc = docFetcher.doc(id, retrieveFieldsOptimizer.getStoredFields());
} else { // make sure to use the schema from the searcher and not the request (cross-core)
try { sdoc = convertLuceneDocToSolrDoc(doc, rctx.getSearcher().getSchema());
Document doc = docFetcher.doc(id, fnames); } else {
sdoc = convertLuceneDocToSolrDoc(doc, rctx.getSearcher().getSchema()); // make sure to use the schema from the searcher and not the request (cross-core) // no need to get stored fields of the document, see SOLR-5968
sdoc = new SolrDocument();
// decorate the document with non-stored docValues fields
if (dvFieldsToReturn != null) {
docFetcher.decorateDocValueFields(sdoc, id, dvFieldsToReturn);
}
} catch (IOException e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error reading document with docId " + id, e);
} }
// decorate the document with non-stored docValues fields
if (retrieveFieldsOptimizer.returnDVFields()) {
docFetcher.decorateDocValueFields(sdoc, id, retrieveFieldsOptimizer.getDvFields());
}
} catch (IOException e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error reading document with docId " + id, e);
} }
if (transformer != null) { if (transformer != null) {

View File

@ -0,0 +1,130 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.response;
import java.util.HashSet;
import java.util.Set;
import org.apache.solr.search.ReturnFields;
import org.apache.solr.search.SolrDocumentFetcher;
import org.apache.solr.search.SolrReturnFields;
public class RetrieveFieldsOptimizer {
// null means get all available stored fields
private final Set<String> storedFields;
// always non null
private final Set<String> dvFields;
RetrieveFieldsOptimizer(Set<String> storedFields, Set<String> dvFields) {
this.storedFields = storedFields;
this.dvFields = dvFields;
}
/**
* Sometimes we could fetch a field value from either the stored document or docValues.
* Such fields have both and are single-valued.
* If choosing docValues allows us to avoid accessing the stored document altogether
* for all fields to be returned then we do it,
* otherwise we prefer the stored value when we have a choice.
*/
void optimize(SolrDocumentFetcher docFetcher) {
optimize(docFetcher.getAllSingleDV());
}
void optimize(Set<String> singleDVs) {
if (storedFields == null) return;
if (!singleDVs.containsAll(storedFields)) return;
dvFields.addAll(storedFields);
storedFields.clear();
}
boolean returnStoredFields() {
return !(storedFields != null && storedFields.isEmpty());
}
boolean returnDVFields() {
return !dvFields.isEmpty();
}
Set<String> getStoredFields() {
return storedFields;
}
Set<String> getDvFields() {
return dvFields;
}
public static RetrieveFieldsOptimizer create(SolrDocumentFetcher docFetcher, ReturnFields returnFields) {
Set<String> storedFields = calcStoredFieldsForReturn(docFetcher, returnFields);
Set<String> dvFields = calcDocValueFieldsForReturn(docFetcher, returnFields);
return new RetrieveFieldsOptimizer(storedFields, dvFields);
}
private static Set<String> calcStoredFieldsForReturn(SolrDocumentFetcher docFetcher, ReturnFields returnFields) {
final Set<String> storedFields = new HashSet<>();
Set<String> fnames = returnFields.getLuceneFieldNames();
if (returnFields.wantsAllFields()) {
return null;
} else if (returnFields.hasPatternMatching()) {
for (String s : docFetcher.getAllStored()) {
if (returnFields.wantsField(s)) {
storedFields.add(s);
}
}
} else if (fnames != null) {
storedFields.addAll(fnames);
}
storedFields.remove(SolrReturnFields.SCORE);
return storedFields;
}
private static Set<String> calcDocValueFieldsForReturn(SolrDocumentFetcher docFetcher, ReturnFields returnFields) {
// always return not null
final Set<String> result = new HashSet<>();
if (returnFields.wantsAllFields()) {
result.addAll(docFetcher.getNonStoredDVs(true));
// check whether there are no additional fields
Set<String> fieldNames = returnFields.getLuceneFieldNames(true);
if (fieldNames != null) {
// add all requested fields that may be useDocValuesAsStored=false
for (String fl : fieldNames) {
if (docFetcher.getNonStoredDVs(false).contains(fl)) {
result.add(fl);
}
}
}
} else if (returnFields.hasPatternMatching()) {
for (String s : docFetcher.getNonStoredDVs(true)) {
if (returnFields.wantsField(s)) {
result.add(s);
}
}
} else {
Set<String> fnames = returnFields.getLuceneFieldNames();
if (fnames != null) {
result.addAll(fnames);
// here we get all non-stored dv fields because even if a user has set
// useDocValuesAsStored=false in schema, he may have requested a field
// explicitly using the fl parameter
result.retainAll(docFetcher.getNonStoredDVs(false));
}
}
return result;
}
}

View File

@ -80,6 +80,10 @@ public class SolrDocumentFetcher {
private final SolrCache<Integer,Document> documentCache; private final SolrCache<Integer,Document> documentCache;
private final Set<String> allStored;
private final Set<String> allSingleDV;
/** Contains the names/patterns of all docValues=true,stored=false fields in the schema. */ /** Contains the names/patterns of all docValues=true,stored=false fields in the schema. */
private final Set<String> allNonStoredDVs; private final Set<String> allNonStoredDVs;
@ -108,12 +112,20 @@ public class SolrDocumentFetcher {
final Set<String> allNonStoredDVs = new HashSet<>(); final Set<String> allNonStoredDVs = new HashSet<>();
final Set<String> nonStoredDVsWithoutCopyTargets = new HashSet<>(); final Set<String> nonStoredDVsWithoutCopyTargets = new HashSet<>();
final Set<String> storedLargeFields = new HashSet<>(); final Set<String> storedLargeFields = new HashSet<>();
final Set<String> allSingleDVs = new HashSet<>();
final Set<String> allStoreds = new HashSet<>();
for (FieldInfo fieldInfo : searcher.getFieldInfos()) { // can find materialized dynamic fields, unlike using the Solr IndexSchema. for (FieldInfo fieldInfo : searcher.getFieldInfos()) { // can find materialized dynamic fields, unlike using the Solr IndexSchema.
final SchemaField schemaField = searcher.getSchema().getFieldOrNull(fieldInfo.name); final SchemaField schemaField = searcher.getSchema().getFieldOrNull(fieldInfo.name);
if (schemaField == null) { if (schemaField == null) {
continue; continue;
} }
if (schemaField.hasDocValues() && !schemaField.multiValued()) {
allSingleDVs.add(fieldInfo.name);
}
if (schemaField.stored()) {
allStoreds.add(fieldInfo.name);
}
if (!schemaField.stored() && schemaField.hasDocValues()) { if (!schemaField.stored() && schemaField.hasDocValues()) {
if (schemaField.useDocValuesAsStored()) { if (schemaField.useDocValuesAsStored()) {
nonStoredDVsUsedAsStored.add(fieldInfo.name); nonStoredDVsUsedAsStored.add(fieldInfo.name);
@ -132,6 +144,8 @@ public class SolrDocumentFetcher {
this.allNonStoredDVs = Collections.unmodifiableSet(allNonStoredDVs); this.allNonStoredDVs = Collections.unmodifiableSet(allNonStoredDVs);
this.nonStoredDVsWithoutCopyTargets = Collections.unmodifiableSet(nonStoredDVsWithoutCopyTargets); this.nonStoredDVsWithoutCopyTargets = Collections.unmodifiableSet(nonStoredDVsWithoutCopyTargets);
this.largeFields = Collections.unmodifiableSet(storedLargeFields); this.largeFields = Collections.unmodifiableSet(storedLargeFields);
this.allSingleDV = Collections.unmodifiableSet(allSingleDVs);
this.allStored = Collections.unmodifiableSet(allStoreds);
} }
public boolean isLazyFieldLoadingEnabled() { public boolean isLazyFieldLoadingEnabled() {
@ -412,7 +426,7 @@ public class SolrDocumentFetcher {
final LeafReader leafReader = leafContexts.get(subIndex).reader(); final LeafReader leafReader = leafContexts.get(subIndex).reader();
for (String fieldName : fields) { for (String fieldName : fields) {
final SchemaField schemaField = searcher.getSchema().getFieldOrNull(fieldName); final SchemaField schemaField = searcher.getSchema().getFieldOrNull(fieldName);
if (schemaField == null || !schemaField.hasDocValues() || doc.containsKey(fieldName)) { if (schemaField == null || !schemaField.hasDocValues()) {
log.warn("Couldn't decorate docValues for field: [{}], schemaField: [{}]", fieldName, schemaField); log.warn("Couldn't decorate docValues for field: [{}], schemaField: [{}]", fieldName, schemaField);
continue; continue;
} }
@ -420,6 +434,7 @@ public class SolrDocumentFetcher {
if (fi == null) { if (fi == null) {
continue; // Searcher doesn't have info about this field, hence ignore it. continue; // Searcher doesn't have info about this field, hence ignore it.
} }
doc.remove(fieldName);
final DocValuesType dvType = fi.getDocValuesType(); final DocValuesType dvType = fi.getDocValuesType();
switch (dvType) { switch (dvType) {
case NUMERIC: case NUMERIC:
@ -555,6 +570,14 @@ public class SolrDocumentFetcher {
} }
} }
public Set<String> getAllSingleDV() {
return allSingleDV;
}
public Set<String> getAllStored() {
return allStored;
}
/** /**
* Returns an unmodifiable set of non-stored docValues field names. * Returns an unmodifiable set of non-stored docValues field names.
* *

View File

@ -0,0 +1,56 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.response;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import org.apache.solr.SolrTestCaseJ4;
import org.junit.Test;
public class TestRetrieveFieldsOptimizer extends SolrTestCaseJ4{
@Test
public void testOptimizer() {
RetrieveFieldsOptimizer optimizer = new RetrieveFieldsOptimizer(
new HashSet<>(Arrays.asList("id", "title")),
new HashSet<>()
);
optimizer.optimize(new HashSet<>(Arrays.asList("id", "title")));
assertTrue(optimizer.returnDVFields());
assertFalse(optimizer.returnStoredFields());
optimizer = new RetrieveFieldsOptimizer(
new HashSet<>(Arrays.asList("id", "title")),
new HashSet<>()
);
optimizer.optimize(new HashSet<>(Collections.singletonList("title")));
assertFalse(optimizer.returnDVFields());
assertTrue(optimizer.returnStoredFields());
optimizer = new RetrieveFieldsOptimizer(
null,
new HashSet<>(Collections.singletonList("id"))
);
optimizer.optimize(new HashSet<>(Collections.singletonList("id")));
assertNull(optimizer.getStoredFields());
assertTrue(optimizer.getDvFields().contains("id"));
}
}