mirror of https://github.com/apache/lucene.git
SOLR-11891: DocStreamer now respects the ReturnFields when populating a SolrDocument
This is an optimization that reduces the number of unneccessary fields a ResponseWriter will see if documentCache is used This commit also includes fixes for SOLR-12107 & SOLR-12108 -- two bugs that were previously dependent on the un-optimized behavior of DocStreamer in order to function properly. - SOLR-12107: Fixed a error in [child] transformer that could ocur if documentCache was not used - SOLR-12108: Fixed the fallback behavior of [raw] and [xml] transformers when an incompatble 'wt' was specified, the field value was lost if documentCache was not used.
This commit is contained in:
parent
d6ed71b5c4
commit
8bd7e5c9d2
|
@ -63,6 +63,11 @@ Bug Fixes
|
|||
|
||||
* SOLR-12103: Raise CryptoKeys.DEFAULT_KEYPAIR_LENGTH from 1024 to 2048. (Mark Miller)
|
||||
|
||||
* SOLR-12107: Fixed a error in [child] transformer that could ocur if documentCache was not used (hossman)
|
||||
|
||||
* SOLR-12108: Fixed the fallback behavior of [raw] and [xml] transformers when an incompatble 'wt' was
|
||||
specified, the field value was lost if documentCache was not used. (hossman)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
|
||||
|
@ -73,6 +78,9 @@ Optimizations
|
|||
* SOLR-11731: LatLonPointSpatialField can now decode points from docValues when stored=false docValues=true,
|
||||
albeit with maximum precision of 1.33cm (Karthik Ramachandran, David Smiley)
|
||||
|
||||
* SOLR-11891: DocStreamer now respects the ReturnFields when populating a SolrDocument, reducing the
|
||||
number of unneccessary fields a ResponseWriter will see if documentCache is used (wei wang, hossman)
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -50,6 +50,8 @@ import org.apache.solr.schema.TrieLongField;
|
|||
import org.apache.solr.search.DocIterator;
|
||||
import org.apache.solr.search.DocList;
|
||||
import org.apache.solr.search.SolrDocumentFetcher;
|
||||
import org.apache.solr.search.ReturnFields;
|
||||
import org.apache.solr.search.SolrReturnFields;
|
||||
|
||||
/**
|
||||
* This streams SolrDocuments from a DocList and applies transformer
|
||||
|
@ -97,7 +99,8 @@ public class DocsStreamer implements Iterator<SolrDocument> {
|
|||
if (retrieveFieldsOptimizer.returnStoredFields()) {
|
||||
Document doc = docFetcher.doc(id, retrieveFieldsOptimizer.getStoredFields());
|
||||
// make sure to use the schema from the searcher and not the request (cross-core)
|
||||
sdoc = convertLuceneDocToSolrDoc(doc, rctx.getSearcher().getSchema());
|
||||
sdoc = convertLuceneDocToSolrDoc(doc, rctx.getSearcher().getSchema(),
|
||||
rctx.getReturnFields());
|
||||
} else {
|
||||
// no need to get stored fields of the document, see SOLR-5968
|
||||
sdoc = new SolrDocument();
|
||||
|
@ -127,23 +130,67 @@ public class DocsStreamer implements Iterator<SolrDocument> {
|
|||
|
||||
}
|
||||
|
||||
// TODO move to SolrDocumentFetcher ? Refactor to also call docFetcher.decorateDocValueFields(...) ?
|
||||
/**
|
||||
* This method is less efficient then the 3 arg version because it may convert some fields that
|
||||
* are not needed
|
||||
*
|
||||
* @deprecated use the 3 arg version for better performance
|
||||
* @see #convertLuceneDocToSolrDoc(Document,IndexSchema,ReturnFields)
|
||||
*/
|
||||
@Deprecated
|
||||
public static SolrDocument convertLuceneDocToSolrDoc(Document doc, final IndexSchema schema) {
|
||||
SolrDocument out = new SolrDocument();
|
||||
return convertLuceneDocToSolrDoc(doc,schema, new SolrReturnFields());
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts the specified <code>Document</code> into a <code>SolrDocument</code>.
|
||||
* <p>
|
||||
* The use of {@link ReturnFields} can be important even when it was already used to retrieve the
|
||||
* {@link Document} from {@link SolrDocumentFetcher} because the Document may have been cached with
|
||||
* more fields then are desired.
|
||||
* </p>
|
||||
*
|
||||
* @param doc <code>Document</code> to be converted, must not be null
|
||||
* @param schema <code>IndexSchema</code> containing the field/fieldType details for the index
|
||||
* the <code>Document</code> came from, must not be null.
|
||||
* @param fields <code>ReturnFields</code> instance that can be use to limit the set of fields
|
||||
* that will be converted, must not be null
|
||||
*/
|
||||
public static SolrDocument convertLuceneDocToSolrDoc(Document doc,
|
||||
final IndexSchema schema,
|
||||
final ReturnFields fields) {
|
||||
// TODO move to SolrDocumentFetcher ? Refactor to also call docFetcher.decorateDocValueFields(...) ?
|
||||
assert null != doc;
|
||||
assert null != schema;
|
||||
assert null != fields;
|
||||
|
||||
// can't just use fields.wantsField(String)
|
||||
// because that doesn't include extra fields needed by transformers
|
||||
final Set<String> fieldNamesNeeded = fields.getLuceneFieldNames();
|
||||
|
||||
final SolrDocument out = new SolrDocument();
|
||||
|
||||
// NOTE: it would be tempting to try and optimize this to loop over fieldNamesNeeded
|
||||
// when it's smaller then the IndexableField[] in the Document -- but that's actually *less* effecient
|
||||
// since Document.getFields(String) does a full (internal) iteration over the full IndexableField[]
|
||||
// see SOLR-11891
|
||||
for (IndexableField f : doc.getFields()) {
|
||||
final String fname = f.name();
|
||||
if (null == fieldNamesNeeded || fieldNamesNeeded.contains(fname) ) {
|
||||
// Make sure multivalued fields are represented as lists
|
||||
Object existing = out.get(f.name());
|
||||
Object existing = out.get(fname);
|
||||
if (existing == null) {
|
||||
SchemaField sf = schema.getFieldOrNull(f.name());
|
||||
SchemaField sf = schema.getFieldOrNull(fname);
|
||||
if (sf != null && sf.multiValued()) {
|
||||
List<Object> vals = new ArrayList<>();
|
||||
vals.add(f);
|
||||
out.setField(f.name(), vals);
|
||||
out.setField(fname, vals);
|
||||
} else {
|
||||
out.setField(f.name(), f);
|
||||
out.setField(fname, f);
|
||||
}
|
||||
} else {
|
||||
out.addField(f.name(), f);
|
||||
out.addField(fname, f);
|
||||
}
|
||||
}
|
||||
}
|
||||
return out;
|
||||
|
|
|
@ -152,7 +152,7 @@ public abstract class TextResponseWriter implements PushWriter {
|
|||
} else if (val instanceof Date) {
|
||||
writeDate(name, (Date) val);
|
||||
} else if (val instanceof Document) {
|
||||
SolrDocument doc = DocsStreamer.convertLuceneDocToSolrDoc((Document) val, schema);
|
||||
SolrDocument doc = DocsStreamer.convertLuceneDocToSolrDoc((Document) val, schema, returnFields);
|
||||
writeSolrDocument(name, doc, returnFields, 0);
|
||||
} else if (val instanceof SolrDocument) {
|
||||
writeSolrDocument(name, (SolrDocument) val, returnFields, 0);
|
||||
|
|
|
@ -41,6 +41,7 @@ import org.apache.solr.search.QParser;
|
|||
import org.apache.solr.search.QueryWrapperFilter;
|
||||
import org.apache.solr.search.SyntaxError;
|
||||
import org.apache.solr.search.SolrDocumentFetcher;
|
||||
import org.apache.solr.search.SolrReturnFields;
|
||||
|
||||
/**
|
||||
*
|
||||
|
@ -122,6 +123,12 @@ class ChildDocTransformer extends DocTransformer {
|
|||
return name;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String[] getExtraRequestFields() {
|
||||
// we always need the idField (of the parent) in order to fill out it's children
|
||||
return new String[] { idField.getName() };
|
||||
}
|
||||
|
||||
@Override
|
||||
public void transform(SolrDocument doc, int docid) {
|
||||
|
||||
|
@ -146,16 +153,16 @@ class ChildDocTransformer extends DocTransformer {
|
|||
while(i.hasNext()) {
|
||||
Integer childDocNum = i.next();
|
||||
Document childDoc = context.getSearcher().doc(childDocNum);
|
||||
SolrDocument solrChildDoc = DocsStreamer.convertLuceneDocToSolrDoc(childDoc, schema);
|
||||
|
||||
if (shouldDecorateWithDVs) {
|
||||
docFetcher.decorateDocValueFields(solrChildDoc, childDocNum, dvFieldsToReturn);
|
||||
}
|
||||
|
||||
// TODO: future enhancement...
|
||||
// support an fl local param in the transformer, which is used to build
|
||||
// a private ReturnFields instance that we use to prune unwanted field
|
||||
// names from solrChildDoc
|
||||
SolrDocument solrChildDoc = DocsStreamer.convertLuceneDocToSolrDoc(childDoc, schema,
|
||||
new SolrReturnFields());
|
||||
|
||||
if (shouldDecorateWithDVs) {
|
||||
docFetcher.decorateDocValueFields(solrChildDoc, childDocNum, dvFieldsToReturn);
|
||||
}
|
||||
doc.addChildDocument(solrChildDoc);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -83,12 +83,31 @@ public class RawValueTransformerFactory extends TransformerFactory
|
|||
return new RawTransformer( field, display );
|
||||
}
|
||||
|
||||
if(field.equals(display)) {
|
||||
return null; // nothing
|
||||
if (field.equals(display)) {
|
||||
// we have to ensure the field is returned
|
||||
return new NoopFieldTransformer(field);
|
||||
}
|
||||
return new RenameFieldTransformer( field, display, false );
|
||||
}
|
||||
|
||||
/**
|
||||
* Trivial Impl that ensure that the specified field is requested as an "extra" field,
|
||||
* but then does nothing during the transformation phase.
|
||||
*/
|
||||
private static final class NoopFieldTransformer extends DocTransformer {
|
||||
final String field;
|
||||
public NoopFieldTransformer(String field ) {
|
||||
this.field = field;
|
||||
}
|
||||
public String getName() { return "noop"; }
|
||||
public String[] getExtraRequestFields() {
|
||||
return new String[] { field };
|
||||
}
|
||||
public void transform(SolrDocument doc, int docid) {
|
||||
// No-Op
|
||||
}
|
||||
}
|
||||
|
||||
static class RawTransformer extends DocTransformer
|
||||
{
|
||||
final String field;
|
||||
|
|
|
@ -30,8 +30,13 @@ import org.apache.solr.response.transform.DocTransformer;
|
|||
public abstract class ReturnFields {
|
||||
/**
|
||||
* Set of field names with their exact names from the lucene index.
|
||||
* <p>
|
||||
* Class such as ResponseWriters pass this to {@link SolrIndexSearcher#doc(int, Set)}.
|
||||
* <p>
|
||||
* NOTE: In some situations, this method may return <code>null</code> even if {@link #wantsAllFields()}
|
||||
* is <code>false</code>. For example: When glob expressions are used ({@link #hasPatternMatching}),
|
||||
* it is safer to request all field names then to attempt to resolve the globs against all possible
|
||||
* dynamic field names in the index.
|
||||
* </p>
|
||||
* @return Set of field names or <code>null</code> (all fields).
|
||||
*/
|
||||
public abstract Set<String> getLuceneFieldNames();
|
||||
|
@ -60,10 +65,17 @@ public abstract class ReturnFields {
|
|||
*/
|
||||
public abstract Map<String,String> getFieldRenames();
|
||||
|
||||
/** Returns <code>true</code> if the specified field should be returned. */
|
||||
/**
|
||||
* Returns <code>true</code> if the specified field should be returned <em>to the external client</em>
|
||||
* -- either using it's own name, or via an alias.
|
||||
* This method returns <code>false</code> even if the specified name is needed as an "extra" field
|
||||
* for use by transformers.
|
||||
*/
|
||||
public abstract boolean wantsField(String name);
|
||||
|
||||
/** Returns <code>true</code> if all fields should be returned. */
|
||||
/**
|
||||
* Returns <code>true</code> if all fields should be returned <em>to the external client</em>.
|
||||
*/
|
||||
public abstract boolean wantsAllFields();
|
||||
|
||||
/** Returns <code>true</code> if the score should be returned. */
|
||||
|
|
|
@ -17,13 +17,25 @@
|
|||
package org.apache.solr.search;
|
||||
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.lucene.document.Document;
|
||||
import static org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.StringField;
|
||||
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.response.transform.*;
|
||||
import static org.apache.solr.response.DocsStreamer.convertLuceneDocToSolrDoc;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.lang.reflect.Method;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.Locale;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
|
||||
public class ReturnFieldsTest extends SolrTestCaseJ4 {
|
||||
|
@ -363,6 +375,93 @@ public class ReturnFieldsTest extends SolrTestCaseJ4 {
|
|||
|
||||
}
|
||||
|
||||
/**
|
||||
* Whitebox verification that the conversion from lucene {@link Document} to {@link SolrDocument} respects
|
||||
* the {@link ReturnFields} and doesn't unneccessarily convert Fields that aren't needed.
|
||||
* <p>
|
||||
* This is important because {@link SolrDocumentFetcher} may return additional fields
|
||||
* (lazy or otherwise) if the document has been cached.
|
||||
* </p>
|
||||
*/
|
||||
public void testWhiteboxSolrDocumentConversion() {
|
||||
final IndexSchema schema = h.getCore().getLatestSchema();
|
||||
SolrDocument docOut = null;
|
||||
|
||||
// a "mock" Document with a bunch of fields...
|
||||
//
|
||||
// (we can mock this with all StringField instances because convertLuceneDocToSolrDoc only
|
||||
// uses the schema for multivalued-ness)
|
||||
final Document docIn = new Document();
|
||||
final StringBuilder allFieldNames = new StringBuilder();
|
||||
docIn.add(new StringField("id","bar",Store.YES));
|
||||
allFieldNames.append("id");
|
||||
docIn.add(new StringField("store_rpt","42",Store.YES));
|
||||
allFieldNames.append(",store_rpt");
|
||||
docIn.add(new StringField("subword","bar",Store.YES)); // single value in multi-value field
|
||||
allFieldNames.append(",subword");
|
||||
docIn.add(new StringField("uniq","xxx",Store.YES));
|
||||
docIn.add(new StringField("uniq","yyy",Store.YES)); // multi-value in multi-valued field
|
||||
allFieldNames.append(",uniq");
|
||||
for (int i = 0; i < 20; i++) {
|
||||
final String foo = "foo_" + i + "_s1";
|
||||
allFieldNames.append(",").append(foo);
|
||||
docIn.add(new StringField(foo, "bar"+i, Store.YES));
|
||||
}
|
||||
|
||||
// output should only have a single field
|
||||
docOut = convertLuceneDocToSolrDoc(docIn, schema, new SolrReturnFields(req("fl","id")));
|
||||
assertEquals(docOut.toString(), 1, docOut.size());
|
||||
assertEquals(docOut.toString(),
|
||||
Collections.singleton("id"),
|
||||
docOut.getFieldNames());
|
||||
assertTrue(docOut.toString(), docOut.get("id") instanceof StringField);
|
||||
|
||||
// output should only have the few specified fields
|
||||
// behavior should be ultimately be consistent for all of these ReturnField instances
|
||||
// (aliasing, extra requested by transformer, or otherwise)
|
||||
for (ReturnFields rf : Arrays.asList
|
||||
(new SolrReturnFields(req("fl","id,subword,store_rpt,uniq,foo_2_s1")),
|
||||
new SolrReturnFields(req("fl","id,xxx:[geo f=store_rpt],uniq,foo_2_s1,subword")),
|
||||
new SolrReturnFields(req("fl","id,xxx:subword,uniq,yyy:foo_2_s1,[geo f=store_rpt]")))) {
|
||||
docOut = convertLuceneDocToSolrDoc(docIn, schema, rf);
|
||||
final String debug = rf.toString() + " => " +docOut.toString();
|
||||
assertEquals(debug, 5, docOut.size());
|
||||
assertEquals(debug,
|
||||
new HashSet<String>(Arrays.asList("id","subword","uniq","foo_2_s1","store_rpt")),
|
||||
docOut.getFieldNames());
|
||||
assertTrue(debug, docOut.get("id") instanceof StringField);
|
||||
assertTrue(debug, docOut.get("store_rpt") instanceof StringField);
|
||||
assertTrue(debug, docOut.get("foo_2_s1") instanceof StringField);
|
||||
assertTrue(debug, docOut.get("subword") instanceof List);
|
||||
assertTrue(debug, docOut.get("uniq") instanceof List);
|
||||
}
|
||||
|
||||
// all source fields should be in the output
|
||||
// behavior should be ultimately be consistent for all of these ReturnField instances
|
||||
// (globbing or requesting more fields then doc has)
|
||||
for (ReturnFields rf : Arrays.asList
|
||||
(new SolrReturnFields(),
|
||||
new SolrReturnFields(req()),
|
||||
new SolrReturnFields(req("fl","*")),
|
||||
new SolrReturnFields(req("fl","*,score")),
|
||||
new SolrReturnFields(req("fl","id,subword,uniq,foo_*,store_*")),
|
||||
new SolrReturnFields(req("fl",allFieldNames+",bogus1,bogus2,bogus3")))) {
|
||||
|
||||
docOut = convertLuceneDocToSolrDoc(docIn, schema, rf);
|
||||
final String debug = rf.toString() + " => " +docOut.toString();
|
||||
assertEquals(debug, 24, docOut.size());
|
||||
assertTrue(debug, docOut.get("id") instanceof StringField);
|
||||
assertTrue(debug, docOut.get("store_rpt") instanceof StringField);
|
||||
assertTrue(debug, docOut.get("subword") instanceof List);
|
||||
assertTrue(debug, docOut.get("uniq") instanceof List);
|
||||
for (int i = 0; i < 20; i++) {
|
||||
assertTrue(debug, docOut.get("foo_" + i + "_s1") instanceof StringField);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
public void testWhitespace() {
|
||||
Random r = random();
|
||||
final int iters = atLeast(30);
|
||||
|
|
Loading…
Reference in New Issue