From c8261a41d30e3966159e3bb9d4d23df0f8e3cf63 Mon Sep 17 00:00:00 2001 From: Shalin Shekhar Mangar Date: Sat, 19 Jul 2014 07:10:36 +0000 Subject: [PATCH] SOLR-6259: Reduce CPU usage by avoiding repeated costly calls to Document.getField inside DocumentBuilder.toDocument for use-cases with large number of fields and copyFields git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1611852 13f79535-47bb-0310-9956-ffa450edef68 --- solr/CHANGES.txt | 4 + .../apache/solr/update/DocumentBuilder.java | 79 +++++++++++-------- 2 files changed, 49 insertions(+), 34 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index a0f4bde4098..6dfb65359a1 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -204,6 +204,10 @@ Optimizations indexes with many fields of same type just use one TokenStream per thread. (Shay Banon, Uwe Schindler, Robert Muir) +* SOLR-6259: Reduce CPU usage by avoiding repeated costly calls to Document.getField inside + DocumentBuilder.toDocument for use-cases with large number of fields and copyFields. + (Steven Bower via shalin) + Other Changes --------------------- diff --git a/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java b/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java index 9c9d7f72ddb..6b58f6d5fda 100644 --- a/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java +++ b/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java @@ -18,6 +18,7 @@ package org.apache.solr.update; import java.util.List; +import java.util.Set; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; @@ -30,6 +31,8 @@ import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.SchemaField; +import com.google.common.collect.Sets; + /** * */ @@ -75,6 +78,7 @@ public class DocumentBuilder { { Document out = new Document(); final float docBoost = doc.getDocumentBoost(); + Set usedFields = Sets.newHashSet(); // Load fields from SolrDocument to Document for( SolrInputField field : doc ) { @@ -103,6 +107,9 @@ public class DocumentBuilder { // it ourselves float compoundBoost = fieldBoost * docBoost; + List copyFields = schema.getCopyFieldsList(name); + if( copyFields.size() == 0 ) copyFields = null; + // load each field value boolean hasField = false; try { @@ -114,48 +121,52 @@ public class DocumentBuilder { if (sfield != null) { used = true; addField(out, sfield, v, applyBoost ? compoundBoost : 1f); + // record the field as having a value + usedFields.add(sfield.getName()); } // Check if we should copy this field value to any other fields. // This could happen whether it is explicit or not. - List copyFields = schema.getCopyFieldsList(name); - for (CopyField cf : copyFields) { - SchemaField destinationField = cf.getDestination(); - - final boolean destHasValues = - (null != out.getField(destinationField.getName())); - - // check if the copy field is a multivalued or not - if (!destinationField.multiValued() && destHasValues) { - throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, - "ERROR: "+getID(doc, schema)+"multiple values encountered for non multiValued copy field " + - destinationField.getName() + ": " + v); - } + if( copyFields != null ){ + for (CopyField cf : copyFields) { + SchemaField destinationField = cf.getDestination(); - used = true; - - // Perhaps trim the length of a copy field - Object val = v; - if( val instanceof String && cf.getMaxChars() > 0 ) { - val = cf.getLimitedValue((String)val); + final boolean destHasValues = usedFields.contains(destinationField.getName()); + + // check if the copy field is a multivalued or not + if (!destinationField.multiValued() && destHasValues) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, + "ERROR: "+getID(doc, schema)+"multiple values encountered for non multiValued copy field " + + destinationField.getName() + ": " + v); + } + + used = true; + + // Perhaps trim the length of a copy field + Object val = v; + if( val instanceof String && cf.getMaxChars() > 0 ) { + val = cf.getLimitedValue((String)val); + } + + // we can't copy any boost unless the dest field is + // indexed & !omitNorms, but which boost we copy depends + // on whether the dest field already contains values (we + // don't want to apply the compounded docBoost more then once) + final float destBoost = + (destinationField.indexed() && !destinationField.omitNorms()) ? + (destHasValues ? fieldBoost : compoundBoost) : 1.0F; + + addField(out, destinationField, val, destBoost); + // record the field as having a value + usedFields.add(destinationField.getName()); } - - // we can't copy any boost unless the dest field is - // indexed & !omitNorms, but which boost we copy depends - // on whether the dest field already contains values (we - // don't want to apply the compounded docBoost more then once) - final float destBoost = - (destinationField.indexed() && !destinationField.omitNorms()) ? - (destHasValues ? fieldBoost : compoundBoost) : 1.0F; - addField(out, destinationField, val, destBoost); + // The final boost for a given field named is the product of the + // *all* boosts on values of that field. + // For multi-valued fields, we only want to set the boost on the + // first field. + fieldBoost = compoundBoost = 1.0f; } - - // The final boost for a given field named is the product of the - // *all* boosts on values of that field. - // For multi-valued fields, we only want to set the boost on the - // first field. - fieldBoost = compoundBoost = 1.0f; } } catch( SolrException ex ) {