SOLR-6259: Reduce CPU usage by avoiding repeated costly calls to Document.getField inside DocumentBuilder.toDocument for use-cases with large number of fields and copyFields

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1611852 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Shalin Shekhar Mangar 2014-07-19 07:10:36 +00:00
parent a75792bb4c
commit c8261a41d3
2 changed files with 49 additions and 34 deletions

View File

@ -204,6 +204,10 @@ Optimizations
indexes with many fields of same type just use one TokenStream per thread.
(Shay Banon, Uwe Schindler, Robert Muir)
* SOLR-6259: Reduce CPU usage by avoiding repeated costly calls to Document.getField inside
DocumentBuilder.toDocument for use-cases with large number of fields and copyFields.
(Steven Bower via shalin)
Other Changes
---------------------

View File

@ -18,6 +18,7 @@
package org.apache.solr.update;
import java.util.List;
import java.util.Set;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@ -30,6 +31,8 @@ import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import com.google.common.collect.Sets;
/**
*
*/
@ -75,6 +78,7 @@ public class DocumentBuilder {
{
Document out = new Document();
final float docBoost = doc.getDocumentBoost();
Set<String> usedFields = Sets.newHashSet();
// Load fields from SolrDocument to Document
for( SolrInputField field : doc ) {
@ -103,6 +107,9 @@ public class DocumentBuilder {
// it ourselves
float compoundBoost = fieldBoost * docBoost;
List<CopyField> copyFields = schema.getCopyFieldsList(name);
if( copyFields.size() == 0 ) copyFields = null;
// load each field value
boolean hasField = false;
try {
@ -114,48 +121,52 @@ public class DocumentBuilder {
if (sfield != null) {
used = true;
addField(out, sfield, v, applyBoost ? compoundBoost : 1f);
// record the field as having a value
usedFields.add(sfield.getName());
}
// Check if we should copy this field value to any other fields.
// This could happen whether it is explicit or not.
List<CopyField> copyFields = schema.getCopyFieldsList(name);
for (CopyField cf : copyFields) {
SchemaField destinationField = cf.getDestination();
final boolean destHasValues =
(null != out.getField(destinationField.getName()));
// check if the copy field is a multivalued or not
if (!destinationField.multiValued() && destHasValues) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"ERROR: "+getID(doc, schema)+"multiple values encountered for non multiValued copy field " +
destinationField.getName() + ": " + v);
}
if( copyFields != null ){
for (CopyField cf : copyFields) {
SchemaField destinationField = cf.getDestination();
used = true;
// Perhaps trim the length of a copy field
Object val = v;
if( val instanceof String && cf.getMaxChars() > 0 ) {
val = cf.getLimitedValue((String)val);
final boolean destHasValues = usedFields.contains(destinationField.getName());
// check if the copy field is a multivalued or not
if (!destinationField.multiValued() && destHasValues) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"ERROR: "+getID(doc, schema)+"multiple values encountered for non multiValued copy field " +
destinationField.getName() + ": " + v);
}
used = true;
// Perhaps trim the length of a copy field
Object val = v;
if( val instanceof String && cf.getMaxChars() > 0 ) {
val = cf.getLimitedValue((String)val);
}
// we can't copy any boost unless the dest field is
// indexed & !omitNorms, but which boost we copy depends
// on whether the dest field already contains values (we
// don't want to apply the compounded docBoost more then once)
final float destBoost =
(destinationField.indexed() && !destinationField.omitNorms()) ?
(destHasValues ? fieldBoost : compoundBoost) : 1.0F;
addField(out, destinationField, val, destBoost);
// record the field as having a value
usedFields.add(destinationField.getName());
}
// we can't copy any boost unless the dest field is
// indexed & !omitNorms, but which boost we copy depends
// on whether the dest field already contains values (we
// don't want to apply the compounded docBoost more then once)
final float destBoost =
(destinationField.indexed() && !destinationField.omitNorms()) ?
(destHasValues ? fieldBoost : compoundBoost) : 1.0F;
addField(out, destinationField, val, destBoost);
// The final boost for a given field named is the product of the
// *all* boosts on values of that field.
// For multi-valued fields, we only want to set the boost on the
// first field.
fieldBoost = compoundBoost = 1.0f;
}
// The final boost for a given field named is the product of the
// *all* boosts on values of that field.
// For multi-valued fields, we only want to set the boost on the
// first field.
fieldBoost = compoundBoost = 1.0f;
}
}
catch( SolrException ex ) {