diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 19dd8a1fd27..8f9bc007c76 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -207,6 +207,21 @@ New Features * SOLR-1726: Added deep paging support to search (sort by score only) which should use less memory when paging deeply into results by keeping the priority queue small. (Manojkumar Rangasamy Kannadasan, gsingers) +* SOLR-2802: New FieldMutatingUpdateProcessor and Factory to simlify the + development of UpdateProcessors that modify field values of documents as + they are indexed. Also includes several useful new implementations: + RemoveBlankFieldUpdateProcessorFactory + TrimFieldUpdateProcessorFactory + HTMLStripFieldUpdateProcessorFactory + RegexReplaceProcessorFactory + FieldLengthUpdateProcessorFactory + ConcatFieldUpdateProcessorFactory + FirstFieldValueUpdateProcessorFactory + LastFieldValueUpdateProcessorFactory + MinFieldValueUpdateProcessorFactory + MaxFieldValueUpdateProcessorFactory + (hossman, janhoy) + Optimizations ---------------------- diff --git a/solr/core/src/java/org/apache/solr/update/processor/ConcatFieldUpdateProcessorFactory.java b/solr/core/src/java/org/apache/solr/update/processor/ConcatFieldUpdateProcessorFactory.java new file mode 100644 index 00000000000..26aca06416b --- /dev/null +++ b/solr/core/src/java/org/apache/solr/update/processor/ConcatFieldUpdateProcessorFactory.java @@ -0,0 +1,124 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.update.processor; + +import org.apache.solr.core.SolrCore; +import org.apache.solr.schema.IndexSchema; +import org.apache.solr.schema.FieldType; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.schema.TextField; +import org.apache.solr.schema.StrField; + +import org.apache.solr.common.SolrInputField; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.response.SolrQueryResponse; + +import org.apache.commons.lang.StringUtils; + +/** + * Concatenates multiple values for fields matching the specified + * conditions using a configurable delimiter which defaults + * to " ,". + *

+ * By default, this processor concatenates the values for any field name + * which according to the schema is multiValued="false" + * and uses TextField or StrField + *

+ * + *

+ * For example, in the configuration below, any "single valued" string and + * text field which is found to contain multiple values except for + * the primary_author field will be concatenated using the + * string " ;" as a delimeter. For the + * primary_author field, the multiple values will be left + * alone for FirstFieldValueUpdateProcessorFactory to deal with. + *

+ * + *
+ *  <updateRequestProcessorChain>
+ *    <processor class="solr.ConcatFieldUpdateProcessorFactory">
+ *      <str name="delimiter">; </str>
+ *      <lst name="exclude">
+ *        <str name="fieldName">primary_author</str>
+ *      </lst>
+ *    </processor>
+ *    <processor class="solr.FirstFieldValueUpdateProcessorFactory">
+ *      <str name="fieldName">primary_author</str>
+ *    </processor>
+ *  </updateRequestProcessorChain>
+ * 
+ */ +public final class ConcatFieldUpdateProcessorFactory extends FieldMutatingUpdateProcessorFactory { + + String delimiter = ", "; + + @SuppressWarnings("unchecked") + @Override + public void init(NamedList args) { + Object d = args.remove("delimiter"); + if (null != d) delimiter = d.toString(); + + super.init(args); + } + + @Override + public UpdateRequestProcessor getInstance(SolrQueryRequest req, + SolrQueryResponse rsp, + UpdateRequestProcessor next) { + return new FieldMutatingUpdateProcessor(getSelector(), next) { + protected SolrInputField mutate(final SolrInputField src) { + if (src.getValueCount() <= 1) return src; + + SolrInputField result = new SolrInputField(src.getName()); + result.setValue(StringUtils.join(src.getValues(), delimiter), + src.getBoost()); + return result; + } + }; + } + + @Override + public FieldMutatingUpdateProcessor.FieldNameSelector + getDefaultSelector(final SolrCore core) { + + final IndexSchema schema = core.getSchema(); + return new FieldMutatingUpdateProcessor.FieldNameSelector() { + public boolean shouldMutate(final String fieldName) { + + // first check type since it should be fastest + FieldType type = schema.getFieldTypeNoEx(fieldName); + if (null == type) return false; + + if (! (TextField.class.isInstance(type) + || StrField.class.isInstance(type))) { + return false; + } + + // only ask for SchemaField if we passed the type check. + SchemaField sf = schema.getFieldOrNull(fieldName); + // shouldn't be null since since type wasn't, but just in case + if (null == sf) return false; + + return ! sf.multiValued(); + } + }; + } + +} + diff --git a/solr/core/src/java/org/apache/solr/update/processor/FieldLengthUpdateProcessorFactory.java b/solr/core/src/java/org/apache/solr/update/processor/FieldLengthUpdateProcessorFactory.java new file mode 100644 index 00000000000..8ffa6fef84b --- /dev/null +++ b/solr/core/src/java/org/apache/solr/update/processor/FieldLengthUpdateProcessorFactory.java @@ -0,0 +1,80 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.update.processor; + +import org.apache.solr.common.util.NamedList; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.core.SolrCore; + + +/** + * Replaces any CharSequence values found in fields matching the specified + * conditions with the lengths of those CharSequences (as an Integer). + *

+ * By default, this processor matches no fields. + *

+ *

For example, with the configuration listed below any documents + * containing String values (such as "abcdef" or + * "xyz") in a field declared in the schema using + * TrieIntField or TrieLongField + * would have those Strings replaced with the length of those fields as an + * Integer + * (ie: 6 and 3 respectively) + *

+ *
+ * <processor class="solr.FieldLengthUpdateProcessorFactory">
+ *   <arr name="typeClass">
+ *     <str>solr.TrieIntField</str>
+ *     <str>solr.TrieLongField</str>
+ *   </arr>
+ * </processor>
+ * 
+ */ +public final class FieldLengthUpdateProcessorFactory extends FieldMutatingUpdateProcessorFactory { + + @SuppressWarnings("unchecked") + @Override + public void init(NamedList args) { + // no length specific init args + super.init(args); + } + + @Override + public FieldMutatingUpdateProcessor.FieldNameSelector + getDefaultSelector(final SolrCore core) { + + return FieldMutatingUpdateProcessor.SELECT_NO_FIELDS; + + } + + @Override + public UpdateRequestProcessor getInstance(SolrQueryRequest req, + SolrQueryResponse rsp, + UpdateRequestProcessor next) { + return new FieldValueMutatingUpdateProcessor(getSelector(), next) { + protected Object mutateValue(final Object src) { + if (src instanceof CharSequence) { + return new Integer(((CharSequence)src).length()); + } + return src; + } + }; + } +} + diff --git a/solr/core/src/java/org/apache/solr/update/processor/FieldMutatingUpdateProcessor.java b/solr/core/src/java/org/apache/solr/update/processor/FieldMutatingUpdateProcessor.java new file mode 100644 index 00000000000..87f22016ada --- /dev/null +++ b/solr/core/src/java/org/apache/solr/update/processor/FieldMutatingUpdateProcessor.java @@ -0,0 +1,283 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.update.processor; + +import java.io.IOException; +import java.util.Arrays; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; + +import static org.apache.solr.common.SolrException.ErrorCode.*; + +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.SolrInputField; +import org.apache.solr.common.SolrException; +import org.apache.solr.core.SolrResourceLoader; +import org.apache.solr.schema.IndexSchema; +import org.apache.solr.schema.FieldType; + +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.update.AddUpdateCommand; + +/** + * Reusable base class for UpdateProcessors that will consider + * AddUpdateCommands and mutate the values assocaited with configured + * fields. + *

+ * Subclasses should override the mutate method to specify how individual + * SolrInputFields identified by the selector associated with this instance + * will be mutated. + *

+ * + * @see FieldMutatingUpdateProcessorFactory + * @see FieldValueMutatingUpdateProcessor + * @see FieldNameSelector + */ +public abstract class FieldMutatingUpdateProcessor + extends UpdateRequestProcessor { + + private final FieldNameSelector selector; + public FieldMutatingUpdateProcessor(FieldNameSelector selector, + UpdateRequestProcessor next) { + super(next); + this.selector = selector; + } + + /** + * Method for mutating SolrInputFields associated with fields identified + * by the FieldNameSelector associated with this processor + * @param src the SolrInputField to mutate, may be modified in place and + * returned + * @return the SolrInputField to use in replacing the original (src) value. + * If null the field will be removed. + */ + protected abstract SolrInputField mutate(final SolrInputField src); + + @Override + public void processAdd(AddUpdateCommand cmd) throws IOException { + final SolrInputDocument doc = cmd.getSolrInputDocument(); + + // make a copy we can iterate over while mutating the doc + final Collection fieldNames + = new ArrayList(doc.getFieldNames()); + + for (final String fname : fieldNames) { + + if (! selector.shouldMutate(fname)) continue; + + final SolrInputField src = doc.get(fname); + final SolrInputField dest = mutate(src); + if (null == dest) { + doc.remove(fname); + } else { + // semantics of what happens if dest has diff name are hard + // we could treat it as a copy, or a rename + // for now, don't allow it. + if (! fname.equals(dest.getName()) ) { + throw new SolrException(SERVER_ERROR, + "mutute returned field with different name: " + + fname + " => " + dest.getName()); + } + doc.put(dest.getName(), dest); + } + } + super.processAdd(cmd); + } + + /** + * Interface for idenfifying which fileds should be mutated + */ + public static interface FieldNameSelector { + public boolean shouldMutate(final String fieldName); + } + + /** Singleton indicating all fields should be mutated */ + public static final FieldNameSelector SELECT_ALL_FIELDS + = new FieldNameSelector() { + public boolean shouldMutate(final String fieldName) { + return true; + } + }; + + /** Singleton indicating no fields should be mutated */ + public static final FieldNameSelector SELECT_NO_FIELDS + = new FieldNameSelector() { + public boolean shouldMutate(final String fieldName) { + return false; + } + }; + + /** + * Wraps two FieldNameSelectors such that the FieldNameSelector + * returned matches all fields specified by the "includes" unless they + * are matched by "excludes" + * @param includes a selector identifying field names that should be selected + * @param excludes a selector identifying field names that should be + * not be selected, even if they are matched by the 'includes' + * selector + * @return Either a new FieldNameSelector or one of the input selecors + * if the combination lends itself to optimization. + */ + public static FieldNameSelector wrap(final FieldNameSelector includes, + final FieldNameSelector excludes) { + + if (SELECT_NO_FIELDS == excludes) { + return includes; + } + + if (SELECT_ALL_FIELDS == excludes) { + return SELECT_NO_FIELDS; + } + + if (SELECT_ALL_FIELDS == includes) { + return new FieldNameSelector() { + public boolean shouldMutate(final String fieldName) { + return ! excludes.shouldMutate(fieldName); + } + }; + } + + return new FieldNameSelector() { + public boolean shouldMutate(final String fieldName) { + return (includes.shouldMutate(fieldName) + && ! excludes.shouldMutate(fieldName)); + } + }; + } + + /** + * Utility method that can be used to define a FieldNameSelector + * using the same types of rules as the FieldMutatingUpdateProcessor init + * code. This may be useful for Factories that wish to define default + * selectors in similar terms to what the configuration would look like. + * @lucene.internal + */ + public static FieldNameSelector createFieldNameSelector + (final SolrResourceLoader loader, + final IndexSchema schema, + final Set fields, + final Set typeNames, + final Collection typeClasses, + final Collection regexes, + final FieldNameSelector defSelector) { + + final Collection classes + = new ArrayList(typeClasses.size()); + + for (String t : typeClasses) { + try { + classes.add(loader.findClass(t)); + } catch (Exception e) { + throw new SolrException(SERVER_ERROR, + "Can't resolve typeClass: " + t, e); + } + } + + if (classes.isEmpty() && + typeNames.isEmpty() && + regexes.isEmpty() && + fields.isEmpty()) { + return defSelector; + } + + return new ConfigurableFieldNameSelector + (schema, fields, typeNames, classes, regexes); + } + + private static final class ConfigurableFieldNameSelector + implements FieldNameSelector { + + final IndexSchema schema; + final Set fields; + final Set typeNames; + final Collection classes; + final Collection regexes; + + private ConfigurableFieldNameSelector(final IndexSchema schema, + final Set fields, + final Set typeNames, + final Collection classes, + final Collection regexes) { + this.schema = schema; + this.fields = fields; + this.typeNames = typeNames; + this.classes = classes; + this.regexes = regexes; + } + + public boolean shouldMutate(final String fieldName) { + + // order of checks is bsaed on what should be quicker + // (ie: set lookups faster the looping over instanceOf / matches tests + + if ( ! (fields.isEmpty() || fields.contains(fieldName)) ) { + return false; + } + + // do not consider it an error if the fieldName has no type + // there might be another processor dealing with it later + FieldType t = schema.getFieldTypeNoEx(fieldName); + if (null != t) { + if (! (typeNames.isEmpty() || typeNames.contains(t.getTypeName())) ) { + return false; + } + + if (! (classes.isEmpty() || instanceOfAny(t, classes)) ) { + return false; + } + } + + if (! (regexes.isEmpty() || matchesAny(fieldName, regexes)) ) { + return false; + } + + return true; + } + + /** + * returns true if the Object 'o' is an instance of any class in + * the Collection + */ + private static boolean instanceOfAny(Object o, Collection classes) { + for (Class c : classes) { + if ( c.isInstance(o) ) return true; + } + return false; + } + + /** + * returns true if the CharSequence 's' matches any Pattern in the + * Collection + */ + private static boolean matchesAny(CharSequence s, + Collection regexes) { + for (Pattern p : regexes) { + if (p.matcher(s).matches()) return true; + } + return false; + } + } +} + diff --git a/solr/core/src/java/org/apache/solr/update/processor/FieldMutatingUpdateProcessorFactory.java b/solr/core/src/java/org/apache/solr/update/processor/FieldMutatingUpdateProcessorFactory.java new file mode 100644 index 00000000000..2e3a1cb4681 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/update/processor/FieldMutatingUpdateProcessorFactory.java @@ -0,0 +1,284 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.update.processor; + +import java.io.IOException; +import java.util.Arrays; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; + +import org.apache.solr.core.SolrCore; +import org.apache.solr.common.SolrException; +import static org.apache.solr.common.SolrException.ErrorCode.*; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.update.AddUpdateCommand; +import org.apache.solr.schema.IndexSchema; +import org.apache.solr.schema.FieldType; +import org.apache.solr.util.plugin.SolrCoreAware; + + +/** + * Base class for implementing Factories for FieldMutatingUpdateProcessors and + * FieldValueMutatingUpdateProcessors. + * + *

+ * This class provides all of the plumbing for configuring the + * FieldNameSelector using the following init params to specify selection + * critera... + *

+ *
    + *
  • fieldName - selecting specific fields by field name lookup
  • + *
  • fieldRegex - selecting specific fields by field name regex match (regexes are checked in the order specified)
  • + *
  • typeName - selecting specific fields by fieldType name lookup
  • + *
  • typeClass - selecting specific fields by fieldType class lookup, including inheritence and interfaces
  • + *
+ * + *

+ * Each critera can specified as either an <arr> of <str>, or + * multiple <str> with the same name. When multiple criteria of a + * single type exist, fields must match at least one to be selected. + * If more then one type of critera exist, fields must match + * at least one of each to be selected. + *

+ *

+ * One or more excludes <lst> params may also be specified, + * containing any of the above criteria, identifying fields to be excluded + * from seelction even if they match the selection criteria. As with the main + * selection critiera a field must match all of criteria in a single exclusion + * in order to be excluded, but multiple exclusions may be specified to get an + * OR behavior + *

+ * + *

+ * In the ExampleFieldMutatingUpdateProcessorFactory configured below, + * fields will be mutated if the name starts with "foo" or "bar"; + * unless the field name contains the substring "SKIP" or + * the fieldType is (or subclasses) DateField. Meaning a field named + * "foo_SKIP" is gaurunteed not to be selected, but a field named "bar_smith" + * that uses StrField will be selected. + *

+ *
+ * <processor class="solr.ExampleFieldMutatingUpdateProcessorFactory">
+ *   <str name="fieldRegex">foo.*</str>
+ *   <str name="fieldRegex">bar.*</str>
+ *   <!-- each set of exclusions is checked independently -->
+ *   <lst name="exclude">
+ *     <str name="fieldRegex">.*SKIP.*</str>
+ *   </lst>
+ *   <lst name="exclude">
+ *     <str name="typeClass">solr.DateField</str>
+ *   </lst>
+ * </processor>
+ * 
+ * + *

+ * Subclasses define the default selection behavior to be applied if no + * criteria is configured by the user. User configured "exclude" criteria + * will be applied to the subclass defined default selector. + *

+ * + * @see FieldMutatingUpdateProcessor + * @see FieldValueMutatingUpdateProcessor + * @see FieldMutatingUpdateProcessor.FieldNameSelector + */ +public abstract class FieldMutatingUpdateProcessorFactory + extends UpdateRequestProcessorFactory + implements SolrCoreAware { + + private static class SelectorParams { + public Set fieldName = Collections.emptySet(); + public Set typeName = Collections.emptySet(); + public Collection typeClass = Collections.emptyList(); + public Collection fieldRegex = Collections.emptyList(); + } + + private SelectorParams inclusions = new SelectorParams(); + private Collection exclusions + = new ArrayList(); + + private FieldMutatingUpdateProcessor.FieldNameSelector selector = null; + + protected final FieldMutatingUpdateProcessor.FieldNameSelector getSelector() { + if (null != selector) return selector; + + throw new SolrException(SERVER_ERROR, "selector was never initialized, "+ + " inform(SolrCore) never called???"); + } + + @SuppressWarnings("unchecked") + private static final SelectorParams parseSelectorParams(NamedList args) { + SelectorParams params = new SelectorParams(); + + params.fieldName = new HashSet(oneOrMany(args, "fieldName")); + params.typeName = new HashSet(oneOrMany(args, "typeName")); + + // we can compile the patterns now + Collection patterns = oneOrMany(args, "fieldRegex"); + if (! patterns.isEmpty()) { + params.fieldRegex = new ArrayList(patterns.size()); + for (String s : patterns) { + try { + params.fieldRegex.add(Pattern.compile(s)); + } catch (PatternSyntaxException e) { + throw new SolrException + (SERVER_ERROR, "Invalid 'fieldRegex' pattern: " + s, e); + } + } + } + + // resolve this into actual Class objects later + params.typeClass = oneOrMany(args, "typeClass"); + + return params; + } + + + /** + * Handles common initialization related to source fields for + * constructoring the FieldNameSelector to be used. + * + * Will error if any unexpected init args are found, so subclasses should + * remove any subclass-specific init args before calling this method. + */ + @SuppressWarnings("unchecked") + @Override + public void init(NamedList args) { + + inclusions = parseSelectorParams(args); + + List excList = args.getAll("exclude"); + for (Object excObj : excList) { + if (null == excObj) { + throw new SolrException + (SERVER_ERROR, "'exclude' init param can not be null"); + } + if (! (excObj instanceof NamedList) ) { + throw new SolrException + (SERVER_ERROR, "'exclude' init param must be "); + } + NamedList exc = (NamedList) excObj; + exclusions.add(parseSelectorParams(exc)); + if (0 < exc.size()) { + throw new SolrException(SERVER_ERROR, + "Unexpected 'exclude' init sub-param(s): '" + + args.getName(0) + "'"); + } + // call once per instance + args.remove("exclude"); + } + if (0 < args.size()) { + throw new SolrException(SERVER_ERROR, + "Unexpected init param(s): '" + + args.getName(0) + "'"); + } + + } + + public void inform(final SolrCore core) { + + final IndexSchema schema = core.getSchema(); + + selector = + FieldMutatingUpdateProcessor.createFieldNameSelector + (core.getResourceLoader(), + core.getSchema(), + inclusions.fieldName, + inclusions.typeName, + inclusions.typeClass, + inclusions.fieldRegex, + getDefaultSelector(core)); + + for (SelectorParams exc : exclusions) { + selector = FieldMutatingUpdateProcessor.wrap + (selector, + FieldMutatingUpdateProcessor.createFieldNameSelector + (core.getResourceLoader(), + core.getSchema(), + exc.fieldName, + exc.typeName, + exc.typeClass, + exc.fieldRegex, + FieldMutatingUpdateProcessor.SELECT_NO_FIELDS)); + } + } + + /** + * Defines the default selection behavior when the user has not + * configured any specific criteria for selecting fields. The Default + * implementation matches all fields, and should be overridden by subclasses + * as needed. + * + * @see FieldMutatingUpdateProcessor#SELECT_ALL_FIELDS + */ + protected FieldMutatingUpdateProcessor.FieldNameSelector + getDefaultSelector(final SolrCore core) { + + return FieldMutatingUpdateProcessor.SELECT_ALL_FIELDS; + + } + + /** + * Removes all instance of the key from NamedList, returning the Set of + * Strings that key refered to. Throws an error if the key didn't refer + * to one or more strings (or arrays of strings) + * @exception SolrException invalid arr/str structure. + */ + private static Collection oneOrMany(final NamedList args, final String key) { + List result = new ArrayList(args.size() / 2); + final String err = "init arg '" + key + "' must be a string " + + "(ie: 'str'), or an array (ie: 'arr') containing strings; found: "; + + for (Object o = args.remove(key); null != o; o = args.remove(key)) { + if (o instanceof String) { + result.add((String)o); + continue; + } + + if (o instanceof Object[]) { + o = Arrays.asList((Object[]) o); + } + + if (o instanceof Collection) { + for (Object item : (Collection)o) { + if (! (item instanceof String)) { + throw new SolrException(SERVER_ERROR, err + item.getClass()); + } + result.add((String)item); + } + continue; + } + + // who knows what the hell we have + throw new SolrException(SERVER_ERROR, err + o.getClass()); + } + + return result; + } + +} + + + diff --git a/solr/core/src/java/org/apache/solr/update/processor/FieldValueMutatingUpdateProcessor.java b/solr/core/src/java/org/apache/solr/update/processor/FieldValueMutatingUpdateProcessor.java new file mode 100644 index 00000000000..2782559b8e9 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/update/processor/FieldValueMutatingUpdateProcessor.java @@ -0,0 +1,81 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.update.processor; + +import org.apache.solr.common.SolrInputField; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Abstract subclass of FieldMutatingUpdateProcessor for implementing + * UpdateProcessors that will mutate all individual values of a selected + * field independently + * + * @see FieldMutatingUpdateProcessorFactory + */ +public abstract class FieldValueMutatingUpdateProcessor + extends FieldMutatingUpdateProcessor { + + private static final Logger log = LoggerFactory.getLogger(FieldValueMutatingUpdateProcessor.class); + + + public static final Object DELETE_VALUE_SINGLETON = new Object() { + public String toString() { + return "!!Singleton Object Triggering Value Deletion!!"; + } + }; + + public FieldValueMutatingUpdateProcessor(FieldNameSelector selector, + UpdateRequestProcessor next) { + super(selector, next); + } + + /** + * Mutates individual values of a field as needed, or returns the original + * value. + * + * @param src a value from a matched field which should be mutated + * @return the value to use as a replacement for src, or + * DELETE_VALUE_SINGLETON to indicate that the value + * should be removed completely. + * @see #DELETE_VALUE_SINGLETON + */ + protected abstract Object mutateValue(final Object src); + + protected final SolrInputField mutate(final SolrInputField src) { + SolrInputField result = new SolrInputField(src.getName()); + for (final Object srcVal : src.getValues()) { + final Object destVal = mutateValue(srcVal); + if (DELETE_VALUE_SINGLETON == destVal) { + /* NOOP */ + log.debug("removing value from field '{}': {}", + src.getName(), srcVal); + } else { + if (destVal != srcVal) { + log.debug("replace value from field '{}': {} with {}", + new Object[] { src.getName(), srcVal, destVal }); + } + result.addValue(destVal, 1.0F); + } + } + result.setBoost(src.getBoost()); + return 0 == result.getValueCount() ? null : result; + } +} + diff --git a/solr/core/src/java/org/apache/solr/update/processor/FieldValueSubsetUpdateProcessorFactory.java b/solr/core/src/java/org/apache/solr/update/processor/FieldValueSubsetUpdateProcessorFactory.java new file mode 100644 index 00000000000..b5eb0864743 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/update/processor/FieldValueSubsetUpdateProcessorFactory.java @@ -0,0 +1,59 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.update.processor; + +import org.apache.solr.core.SolrCore; + +import org.apache.solr.common.SolrInputField; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.response.SolrQueryResponse; + +import java.util.Collection; + +/** + * Base class for processors that want to mutate selected fields to only + * keep a subset of the original values. + * @see #pickSubset + */ +public abstract class FieldValueSubsetUpdateProcessorFactory extends FieldMutatingUpdateProcessorFactory { + + @Override + public final UpdateRequestProcessor getInstance(SolrQueryRequest req, + SolrQueryResponse rsp, + UpdateRequestProcessor next) { + return new FieldMutatingUpdateProcessor(getSelector(), next) { + protected SolrInputField mutate(final SolrInputField src) { + if (src.getValueCount() <= 1) return src; + + SolrInputField result = new SolrInputField(src.getName()); + result.setValue(pickSubset(src.getValues()), + src.getBoost()); + return result; + } + }; + } + + /** + * Method subclasses must override to specify which values should be kept. + * This method will not be called unless the collection contains more then + * one value. + */ + protected abstract Collection pickSubset(Collection values); + +} + diff --git a/solr/core/src/java/org/apache/solr/update/processor/FirstFieldValueUpdateProcessorFactory.java b/solr/core/src/java/org/apache/solr/update/processor/FirstFieldValueUpdateProcessorFactory.java new file mode 100644 index 00000000000..5243f73dbe3 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/update/processor/FirstFieldValueUpdateProcessorFactory.java @@ -0,0 +1,65 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.update.processor; + +import org.apache.solr.core.SolrCore; + +import java.util.Collections; +import java.util.Collection; +import java.util.Iterator; + +/** + * Keeps only the first value of fields matching the specified + * conditions. Correct behavior assumes that the SolrInputFields being mutated + * are either single valued, or use an ordered Collection (ie: not a Set). + *

+ * By default, this processor matches no fields. + *

+ * + *

+ * For example, in the configuration below, if a field named + * primary_author contained multiple values (ie: + * "Adam Doe", "Bob Smith", "Carla Jones") then only the first + * value (ie: "Adam Doe") will be kept + *

+ * + *
+ * <processor class="solr.FirstFieldValueUpdateProcessorFactory">
+ *   <str name="fieldName">primary_author</str>
+ * </processor>
+ * 
+ * + * @see LastFieldValueUpdateProcessorFactory + */ +public final class FirstFieldValueUpdateProcessorFactory extends FieldValueSubsetUpdateProcessorFactory { + + @Override + public Collection pickSubset(Collection values) { + // trust the iterator + return Collections.singletonList(values.iterator().next()); + } + + @Override + public FieldMutatingUpdateProcessor.FieldNameSelector + getDefaultSelector(final SolrCore core) { + + return FieldMutatingUpdateProcessor.SELECT_NO_FIELDS; + } + +} + diff --git a/solr/core/src/java/org/apache/solr/update/processor/HTMLStripFieldUpdateProcessorFactory.java b/solr/core/src/java/org/apache/solr/update/processor/HTMLStripFieldUpdateProcessorFactory.java new file mode 100644 index 00000000000..d0dcb80ec47 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/update/processor/HTMLStripFieldUpdateProcessorFactory.java @@ -0,0 +1,88 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.update.processor; + +import org.apache.solr.core.SolrCore; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.response.SolrQueryResponse; + +import org.apache.lucene.analysis.CharReader; +import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter; + +import org.apache.commons.io.IOUtils; +import java.io.IOException; +import java.io.Reader; +import java.io.StringReader; +import java.io.StringWriter; + +/** + * Strips all HTML Markup in any CharSequence values + * found in fields matching the specified conditions. + *

+ * By default this processor matches no fields + *

+ * + *

For example, with the configuration listed below any documents + * containing HTML markup in any field declared in the schema using + * StrField will have that HTML striped away. + *

+ *
+ * <processor class="solr.HTMLStripFieldUpdateProcessorFactory">
+ *   <str name="typeClass">solr.StrField</str>
+ * </processor>
+ * 
+ */ +public final class HTMLStripFieldUpdateProcessorFactory extends FieldMutatingUpdateProcessorFactory { + + @Override + public FieldMutatingUpdateProcessor.FieldNameSelector + getDefaultSelector(final SolrCore core) { + + return FieldMutatingUpdateProcessor.SELECT_NO_FIELDS; + + } + + @Override + public UpdateRequestProcessor getInstance(SolrQueryRequest req, + SolrQueryResponse rsp, + UpdateRequestProcessor next) { + return new FieldValueMutatingUpdateProcessor(getSelector(), next) { + protected Object mutateValue(final Object src) { + if (src instanceof CharSequence) { + CharSequence s = (CharSequence)src; + StringWriter result = new StringWriter(s.length()); + Reader in = null; + try { + in = new HTMLStripCharFilter + (CharReader.get(new StringReader(s.toString()))); + IOUtils.copy(in, result); + return result.toString(); + } catch (IOException e) { + // we tried and failed + return s; + } finally { + IOUtils.closeQuietly(in); + } + + } + return src; + } + }; + } +} + diff --git a/solr/core/src/java/org/apache/solr/update/processor/LastFieldValueUpdateProcessorFactory.java b/solr/core/src/java/org/apache/solr/update/processor/LastFieldValueUpdateProcessorFactory.java new file mode 100644 index 00000000000..491328ebb9f --- /dev/null +++ b/solr/core/src/java/org/apache/solr/update/processor/LastFieldValueUpdateProcessorFactory.java @@ -0,0 +1,81 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.update.processor; + +import org.apache.solr.core.SolrCore; + +import java.util.Collections; +import java.util.Collection; +import java.util.List; +import java.util.SortedSet; +import java.util.Iterator; + +/** + * Keeps only the last value of fields matching the specified + * conditions. Correct behavior assumes that the SolrInputFields being mutated + * are either single valued, or use an ordered Collection (ie: not a Set). + *

+ * By default, this processor matches no fields. + *

+ * + *

+ * For example, in the configuration below, if a field named + * primary_author contained multiple values (ie: + * "Adam Doe", "Bob Smith", "Carla Jones") then only the last + * value (ie: "Carla Jones") will be kept + *

+ * + *
+ * <processor class="solr.LastFieldValueUpdateProcessorFactory">
+ *   <str name="fieldName">primary_author</str>
+ * </processor>
+ * 
+ * + * @see FirstFieldValueUpdateProcessorFactory + */ +public final class LastFieldValueUpdateProcessorFactory extends FieldValueSubsetUpdateProcessorFactory { + + @Override + public Collection pickSubset(Collection values) { + + Object result = null; + + if (values instanceof List) { + // optimize index lookup + List l = (List)values; + result = l.get(l.size()-1); + } else if (values instanceof SortedSet) { + // optimize tail lookup + result = ((SortedSet)values).last(); + } else { + // trust the iterator + for (Object o : values) { result = o; } + } + + return Collections.singletonList(result); + } + + @Override + public FieldMutatingUpdateProcessor.FieldNameSelector + getDefaultSelector(final SolrCore core) { + + return FieldMutatingUpdateProcessor.SELECT_NO_FIELDS; + } + +} + diff --git a/solr/core/src/java/org/apache/solr/update/processor/MaxFieldValueUpdateProcessorFactory.java b/solr/core/src/java/org/apache/solr/update/processor/MaxFieldValueUpdateProcessorFactory.java new file mode 100644 index 00000000000..e16b1ab1179 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/update/processor/MaxFieldValueUpdateProcessorFactory.java @@ -0,0 +1,75 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.update.processor; + +import org.apache.solr.core.SolrCore; + +import java.util.Collections; +import java.util.Collection; +import java.util.Iterator; + +/** + * An update processor that keeps only the the maximum value from any selected + * fields where multiple values are found. Correct behavior assumes that all + * of the values in the SolrInputFields being mutated are mutually comparable; + * If this is not the case, then the full list of all values found will be + * used as is. + *

+ * By default, this processor matches no fields. + *

+ * + *

+ * In the example configuration below, if a document contains multiple integer + * values (ie: 64, 128, 1024) in the field + * largestFileSize then only the biggest value + * (ie: 1024) will be kept in that field. + *

+ * + *

+ *  <processor class="solr.MaxFieldValueUpdateProcessorFactory">
+ *    <str name="fieldName">largestFileSize</str>
+ *  </processor>
+ * 
+ * + * @see MinFieldValueUpdateProcessorFactory + * @see Collections#max + */ +public final class MaxFieldValueUpdateProcessorFactory extends FieldValueSubsetUpdateProcessorFactory { + + @Override + @SuppressWarnings("unchecked") + public Collection pickSubset(Collection values) { + Collection result = values; + try { + result = Collections.singletonList + (Collections.max((Collection)values)); + } catch (ClassCastException e) { + /* NOOP */ + } + return result; + } + + @Override + public FieldMutatingUpdateProcessor.FieldNameSelector + getDefaultSelector(final SolrCore core) { + + return FieldMutatingUpdateProcessor.SELECT_NO_FIELDS; + } + +} + diff --git a/solr/core/src/java/org/apache/solr/update/processor/MinFieldValueUpdateProcessorFactory.java b/solr/core/src/java/org/apache/solr/update/processor/MinFieldValueUpdateProcessorFactory.java new file mode 100644 index 00000000000..84fdb285f93 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/update/processor/MinFieldValueUpdateProcessorFactory.java @@ -0,0 +1,75 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.update.processor; + +import org.apache.solr.core.SolrCore; + +import java.util.Collections; +import java.util.Collection; +import java.util.Iterator; + +/** + * An update processor that keeps only the the minimum value from any selected + * fields where multiple values are found. Correct behavior assumes that all + * of the values in the SolrInputFields being mutated are mutually comparable; + * If this is not the case, then the full list of all values found will be + * used as is. + *

+ * By default, this processor matches no fields. + *

+ * + *

+ * In the example configuration below, if a document contains multiple integer + * values (ie: 64, 128, 1024) in the field + * smallestFileSize then only the smallest value + * (ie: 64) will be kept in that field. + *

+ * + *

+ *  <processor class="solr.MinFieldValueUpdateProcessorFactory">
+ *    <str name="fieldName">smallestFileSize</str>
+ *  </processor>
+ * 
+ * + * @see MaxFieldValueUpdateProcessorFactory + * @see Collections#min + */ +public final class MinFieldValueUpdateProcessorFactory extends FieldValueSubsetUpdateProcessorFactory { + + @Override + @SuppressWarnings("unchecked") + public Collection pickSubset(Collection values) { + Collection result = values; + try { + result = Collections.singletonList + (Collections.min((Collection)values)); + } catch (ClassCastException e) { + /* NOOP */ + } + return result; + } + + @Override + public FieldMutatingUpdateProcessor.FieldNameSelector + getDefaultSelector(final SolrCore core) { + + return FieldMutatingUpdateProcessor.SELECT_NO_FIELDS; + } + +} + diff --git a/solr/core/src/java/org/apache/solr/update/processor/RegexReplaceProcessorFactory.java b/solr/core/src/java/org/apache/solr/update/processor/RegexReplaceProcessorFactory.java new file mode 100644 index 00000000000..2b1ba1cda38 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/update/processor/RegexReplaceProcessorFactory.java @@ -0,0 +1,121 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.update.processor; + +import org.apache.solr.core.SolrCore; + +import org.apache.solr.common.SolrException; +import org.apache.solr.common.SolrException.ErrorCode; +import org.apache.solr.common.util.NamedList; + +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.response.SolrQueryResponse; + +import java.util.regex.PatternSyntaxException; +import java.util.regex.Pattern; +import java.util.regex.Matcher; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * An updated processor that applies a configured regex to any + * CharSequence values found in the selected fields, and replaces + * any matches with the configured replacement string + *

+ * By default this processor applies itself to no fields. + *

+ * + *

+ * For example, with the configuration listed below, any sequence of multiple + * whitespace characters found in values for field named title + * or content will be replaced by a single space character. + *

+ * + *
+ * <processor class="solr.RegexReplaceProcessorFactory">
+ *   <str name="fieldName">content</str>
+ *   <str name="fieldName">title</str>
+ *   <str name="pattern">\s+</str>
+ *   <str name="replacement"> </str>
+ * </processor>
+ * 
+ * + * @see java.util.regex.Pattern + */ +public final class RegexReplaceProcessorFactory extends FieldMutatingUpdateProcessorFactory { + + private static final Logger log = LoggerFactory.getLogger(RegexReplaceProcessorFactory.class); + + private static final String REPLACEMENT_PARAM = "replacement"; + private static final String PATTERN_PARAM = "pattern"; + + private Pattern pattern; + private String replacement; + + @SuppressWarnings("unchecked") + @Override + public void init(NamedList args) { + + String patternParam = args.remove(PATTERN_PARAM).toString(); + + if(patternParam == null) { + throw new SolrException(ErrorCode.SERVER_ERROR, + "Missing required init parameter: " + PATTERN_PARAM); + } + try { + pattern = Pattern.compile(patternParam); + } catch (PatternSyntaxException e) { + throw new SolrException(ErrorCode.SERVER_ERROR, + "Invalid regex: " + patternParam, e); + } + + String replacementParam = args.remove(REPLACEMENT_PARAM).toString(); + if(replacementParam == null) { + throw new SolrException(ErrorCode.SERVER_ERROR, + "Missing required init parameter: " + REPLACEMENT_PARAM); + } + replacement = Matcher.quoteReplacement(replacementParam); + + super.init(args); + } + + /** + * @see FieldMutatingUpdateProcessor#SELECT_NO_FIELDS + */ + protected FieldMutatingUpdateProcessor.FieldNameSelector + getDefaultSelector(final SolrCore core) { + + return FieldMutatingUpdateProcessor.SELECT_NO_FIELDS; + + } + + @Override + public UpdateRequestProcessor getInstance(SolrQueryRequest request, + SolrQueryResponse response, + UpdateRequestProcessor next) { + return new FieldValueMutatingUpdateProcessor(getSelector(), next) { + protected Object mutateValue(final Object src) { + if (src instanceof CharSequence) { + CharSequence txt = (CharSequence)src; + return pattern.matcher(txt).replaceAll(replacement); + } + return src; + } + }; + } +} diff --git a/solr/core/src/java/org/apache/solr/update/processor/RemoveBlankFieldUpdateProcessorFactory.java b/solr/core/src/java/org/apache/solr/update/processor/RemoveBlankFieldUpdateProcessorFactory.java new file mode 100644 index 00000000000..86a0ca4e689 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/update/processor/RemoveBlankFieldUpdateProcessorFactory.java @@ -0,0 +1,70 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.update.processor; + +import org.apache.solr.common.util.NamedList; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.response.SolrQueryResponse; + +/** + * Removes any values found which are CharSequence with a length of 0. + * (ie: empty strings) + *

+ * By default this processor applies itself to all fields. + *

+ * + *

+ * For example, with the configuration listed below, blank strings will be + * removed from all fields except those whose name ends with + * "_literal". + *

+ * + *
+ * <processor class="solr.RemoveBlankFieldUpdateProcessorFactory">
+ *   <lst name="exclude">
+ *     <str name="fieldRegex">.*_literal</str>
+ *   </lst>
+ * </processor>
+ * 
+ * + */ +public final class RemoveBlankFieldUpdateProcessorFactory extends FieldMutatingUpdateProcessorFactory { + + @SuppressWarnings("unchecked") + @Override + public void init(NamedList args) { + // no trim specific init args + super.init(args); + } + + @Override + public UpdateRequestProcessor getInstance(SolrQueryRequest req, + SolrQueryResponse rsp, + UpdateRequestProcessor next) { + return new FieldValueMutatingUpdateProcessor(getSelector(), next) { + protected Object mutateValue(final Object src) { + if (src instanceof CharSequence + && 0 == ((CharSequence)src).length()) { + return DELETE_VALUE_SINGLETON; + } + return src; + } + }; + } +} + diff --git a/solr/core/src/java/org/apache/solr/update/processor/TrimFieldUpdateProcessorFactory.java b/solr/core/src/java/org/apache/solr/update/processor/TrimFieldUpdateProcessorFactory.java new file mode 100644 index 00000000000..1754ac3a2fc --- /dev/null +++ b/solr/core/src/java/org/apache/solr/update/processor/TrimFieldUpdateProcessorFactory.java @@ -0,0 +1,68 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.update.processor; + +import org.apache.solr.common.util.NamedList; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.response.SolrQueryResponse; + + +/** + * Trims leading and trailing whitespace from any CharSequence values + * found in fields matching the specified conditions and returns the + * resulting String. + *

+ * By default this processor matches all fields + *

+ * + *

For example, with the configuration listed all String field values + * will have leading and trailing spaces removed except for fields whose + * named ends with "_literal". + *

+ *
+ * <processor class="solr.TrimFieldUpdateProcessorFactory">
+ *   <lst name="exclude">
+ *     <str name="fieldRegex">.*_literal</str>
+ *   </lst>
+ * </processor>
+ * 
+ */ +public final class TrimFieldUpdateProcessorFactory extends FieldMutatingUpdateProcessorFactory { + + @SuppressWarnings("unchecked") + @Override + public void init(NamedList args) { + // no trim specific init args + super.init(args); + } + + @Override + public UpdateRequestProcessor getInstance(SolrQueryRequest req, + SolrQueryResponse rsp, + UpdateRequestProcessor next) { + return new FieldValueMutatingUpdateProcessor(getSelector(), next) { + protected Object mutateValue(final Object src) { + if (src instanceof CharSequence) { + return ((CharSequence)src).toString().trim(); + } + return src; + } + }; + } +} + diff --git a/solr/core/src/test-files/solr/conf/solrconfig-update-processor-chains.xml b/solr/core/src/test-files/solr/conf/solrconfig-update-processor-chains.xml new file mode 100644 index 00000000000..5555dfe7ae7 --- /dev/null +++ b/solr/core/src/test-files/solr/conf/solrconfig-update-processor-chains.xml @@ -0,0 +1,235 @@ + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + + solr.TrieIntField + solr.TrieLongField + + + + min_foo_l + + + max_foo_l + + + ; + + primary_author_s1 + + + + primary_author_s1 + first_foo_l + + + + + + + + + + + + + + foo_t + + + + + + + + + foo_t + + + + + + foo.* + bar.* + + .*HOSS.* + + + + + + foo.* + bar.* + + + solr.DateField + + + .*HOSS.* + + + + + + foo.* + bar.* + + + solr.DateField + .*HOSS.* + + + + + + + name + foo_t + + + + + + name + foo_t + + + + + + + foo.* + bar.*_s + + + + + + nametext + text_sw + + + + + + solr.DateField + solr.StrField + + + + + + solr.DateField + solr.StrField + + foo.* + + + + + + + + + + + + + + + + + + foo.* + yak.* + + + + + + + + + + + foo_s + + + + + string + ; + + + + + + foo_s + bar_s + + + + + foo_s + bar_s + + + + + foo_i + foo_s + bar_s + + + + + foo_i + foo_s + bar_s + + + + + + html_s + + + + + + + content + title + \s+ + X + + + + diff --git a/solr/core/src/test/org/apache/solr/update/processor/FieldMutatingUpdateProcessorTest.java b/solr/core/src/test/org/apache/solr/update/processor/FieldMutatingUpdateProcessorTest.java new file mode 100644 index 00000000000..d1ecb1d4054 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/update/processor/FieldMutatingUpdateProcessorTest.java @@ -0,0 +1,674 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.update.processor; + +import java.util.ArrayList; +import java.util.LinkedHashSet; +import java.util.TreeSet; +import java.util.HashMap; +import java.util.Map; +import java.util.Arrays; +import java.io.IOException; + +import org.apache.solr.SolrTestCaseJ4; + +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.SolrInputField; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.common.params.SolrParams; + +import org.apache.solr.core.SolrCore; + +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.request.LocalSolrQueryRequest; +import org.apache.solr.response.SolrQueryResponse; + +import org.apache.solr.update.AddUpdateCommand; +import org.apache.solr.update.processor.UpdateRequestProcessor; +import org.apache.solr.update.processor.UpdateRequestProcessorChain; + +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * Tests the basics of configuring FieldMutatingUpdateProcessors + * (mainly via TrimFieldUpdateProcessor) and the logic of other various + * subclasses. + */ +public class FieldMutatingUpdateProcessorTest extends SolrTestCaseJ4 { + + @BeforeClass + public static void beforeClass() throws Exception { + initCore("solrconfig-update-processor-chains.xml", "schema12.xml"); + } + + public void testComprehensive() throws Exception { + + final String countMe = "how long is this string?"; + final int count = countMe.length(); + + processAdd("comprehensive", + doc(f("id", "1111"), + f("primary_author_s1", + "XXXX", "Adam", "Sam"), + f("all_authors_s1", + "XXXX", "Adam", "Sam"), + f("foo_is", countMe, new Integer(42)), + f("first_foo_l", countMe, new Integer(-34)), + f("max_foo_l", countMe, new Integer(-34)), + f("min_foo_l", countMe, new Integer(-34)))); + + assertU(commit()); + + assertQ(req("id:1111") + ,"//str[@name='primary_author_s1'][.='XXXX']" + ,"//str[@name='all_authors_s1'][.='XXXX; Adam; Sam']" + ,"//arr[@name='foo_is']/int[1][.='"+count+"']" + ,"//arr[@name='foo_is']/int[2][.='42']" + ,"//long[@name='max_foo_l'][.='"+count+"']" + ,"//long[@name='first_foo_l'][.='"+count+"']" + ,"//long[@name='min_foo_l'][.='-34']" + ); + + + + } + + + + public void testTrimAll() throws Exception { + SolrInputDocument d = null; + + d = processAdd("trim-all", + doc(f("id", "1111"), + f("name", " Hoss ", new StringBuilder(" Man")), + f("foo_t", " some text ", "other Text\t"), + f("foo_d", new Integer(42)), + field("foo_s", 5.0F, " string "))); + + assertNotNull(d); + + // simple stuff + assertEquals("string", d.getFieldValue("foo_s")); + assertEquals(Arrays.asList("some text","other Text"), + d.getFieldValues("foo_t")); + assertEquals(Arrays.asList("Hoss","Man"), + d.getFieldValues("name")); + + // slightly more interesting + assertEquals("processor borked non string value", + new Integer(42), d.getFieldValue("foo_d")); + assertEquals("wrong boost", + 5.0F, d.getField("foo_s").getBoost(), 0.0F); + } + + public void testTrimFields() throws Exception { + for (String chain : Arrays.asList("trim-fields", "trim-fields-arr")) { + SolrInputDocument d = null; + d = processAdd(chain, + doc(f("id", "1111"), + f("name", " Hoss ", " Man"), + f("foo_t", " some text ", "other Text\t"), + f("foo_s", " string "))); + + assertNotNull(d); + + assertEquals(" string ", d.getFieldValue("foo_s")); + assertEquals(Arrays.asList("some text","other Text"), + d.getFieldValues("foo_t")); + assertEquals(Arrays.asList("Hoss","Man"), + d.getFieldValues("name")); + } + } + + public void testTrimField() throws Exception { + SolrInputDocument d = null; + d = processAdd("trim-field", + doc(f("id", "1111"), + f("name", " Hoss ", " Man"), + f("foo_t", " some text ", "other Text\t"), + f("foo_s", " string "))); + + assertNotNull(d); + + assertEquals(" string ", d.getFieldValue("foo_s")); + assertEquals(Arrays.asList("some text","other Text"), + d.getFieldValues("foo_t")); + assertEquals(Arrays.asList(" Hoss "," Man"), + d.getFieldValues("name")); + } + + public void testTrimRegex() throws Exception { + SolrInputDocument d = null; + d = processAdd("trim-field-regexes", + doc(f("id", "1111"), + f("foo_t", " string1 "), + f("foozat_s", " string2 "), + f("bar_t", " string3 "), + f("bar_s", " string4 "))); + + assertNotNull(d); + + assertEquals("string1", d.getFieldValue("foo_t")); + assertEquals("string2", d.getFieldValue("foozat_s")); + assertEquals(" string3 ", d.getFieldValue("bar_t")); + assertEquals("string4", d.getFieldValue("bar_s")); + + } + + public void testTrimTypes() throws Exception { + SolrInputDocument d = null; + d = processAdd("trim-types", + doc(f("id", "1111"), + f("foo_sw", " string0 "), + f("name", " string1 "), + f("title", " string2 "), + f("bar_t", " string3 "), + f("bar_s", " string4 "))); + + assertNotNull(d); + + assertEquals("string0", d.getFieldValue("foo_sw")); + assertEquals("string1", d.getFieldValue("name")); + assertEquals("string2", d.getFieldValue("title")); + assertEquals(" string3 ", d.getFieldValue("bar_t")); + assertEquals(" string4 ", d.getFieldValue("bar_s")); + + } + + public void testTrimClasses() throws Exception { + SolrInputDocument d = null; + d = processAdd("trim-classes", + doc(f("id", "1111"), + f("foo_t", " string1 "), + f("foo_s", " string2 "), + f("bar_dt", " string3 "), + f("bar_pdt", " string4 "))); + + assertNotNull(d); + + assertEquals(" string1 ", d.getFieldValue("foo_t")); + assertEquals("string2", d.getFieldValue("foo_s")); + assertEquals("string3", d.getFieldValue("bar_dt")); + assertEquals("string4", d.getFieldValue("bar_pdt")); + + } + + public void testTrimMultipleRules() throws Exception { + SolrInputDocument d = null; + d = processAdd("trim-multi", + doc(f("id", "1111"), + f("foo_t", " string1 "), + f("foo_s", " string2 "), + f("bar_dt", " string3 "), + f("foo_pdt", " string4 "))); + + assertNotNull(d); + + assertEquals(" string1 ", d.getFieldValue("foo_t")); + assertEquals("string2", d.getFieldValue("foo_s")); + assertEquals(" string3 ", d.getFieldValue("bar_dt")); + assertEquals("string4", d.getFieldValue("foo_pdt")); + + } + + public void testTrimExclusions() throws Exception { + SolrInputDocument d = null; + d = processAdd("trim-most", + doc(f("id", "1111"), + f("foo_t", " string1 "), + f("foo_s", " string2 "), + f("bar_dt", " string3 "), + f("foo_pdt", " string4 "))); + + assertNotNull(d); + + assertEquals(" string1 ", d.getFieldValue("foo_t")); + assertEquals("string2", d.getFieldValue("foo_s")); + assertEquals("string3", d.getFieldValue("bar_dt")); + assertEquals("string4", d.getFieldValue("foo_pdt")); + + d = processAdd("trim-many", + doc(f("id", "1111"), + f("foo_t", " string1 "), + f("foo_s", " string2 "), + f("bar_dt", " string3 "), + f("bar_HOSS_s", " string4 "), + f("foo_pdt", " string5 "), + f("foo_HOSS_pdt", " string6 "))); + + assertNotNull(d); + + assertEquals("string1", d.getFieldValue("foo_t")); + assertEquals("string2", d.getFieldValue("foo_s")); + assertEquals("string3", d.getFieldValue("bar_dt")); + assertEquals(" string4 ", d.getFieldValue("bar_HOSS_s")); + assertEquals("string5", d.getFieldValue("foo_pdt")); + assertEquals(" string6 ", d.getFieldValue("foo_HOSS_pdt")); + + d = processAdd("trim-few", + doc(f("id", "1111"), + f("foo_t", " string1 "), + f("foo_s", " string2 "), + f("bar_dt", " string3 "), + f("bar_HOSS_s", " string4 "), + f("foo_pdt", " string5 "), + f("foo_HOSS_pdt", " string6 "))); + + assertNotNull(d); + + assertEquals("string1", d.getFieldValue("foo_t")); + assertEquals("string2", d.getFieldValue("foo_s")); + assertEquals(" string3 ", d.getFieldValue("bar_dt")); + assertEquals(" string4 ", d.getFieldValue("bar_HOSS_s")); + assertEquals(" string5 ", d.getFieldValue("foo_pdt")); + assertEquals(" string6 ", d.getFieldValue("foo_HOSS_pdt")); + + d = processAdd("trim-some", + doc(f("id", "1111"), + f("foo_t", " string1 "), + f("foo_s", " string2 "), + f("bar_dt", " string3 "), + f("bar_HOSS_s", " string4 "), + f("foo_pdt", " string5 "), + f("foo_HOSS_pdt", " string6 "))); + + assertNotNull(d); + + assertEquals("string1", d.getFieldValue("foo_t")); + assertEquals("string2", d.getFieldValue("foo_s")); + assertEquals("string3", d.getFieldValue("bar_dt")); + assertEquals("string4", d.getFieldValue("bar_HOSS_s")); + assertEquals("string5", d.getFieldValue("foo_pdt")); + assertEquals(" string6 ", d.getFieldValue("foo_HOSS_pdt")); + } + + public void testRemoveBlanks() throws Exception { + SolrInputDocument d = null; + d = processAdd("remove-all-blanks", + doc(f("id", "1111"), + f("foo_s", "string1", ""), + f("bar_dt", "string2", "", "string3"), + f("yak_t", ""), + f("foo_d", new Integer(42)))); + + assertNotNull(d); + + assertEquals(Arrays.asList("string1"), + d.getFieldValues("foo_s")); + assertEquals(Arrays.asList("string2","string3"), + d.getFieldValues("bar_dt")); + assertFalse("shouldn't be any values for yak_t", + d.containsKey("yak_t")); + assertEquals("processor borked non string value", + new Integer(42), d.getFieldValue("foo_d")); + + } + + public void testStrLength() throws Exception { + SolrInputDocument d = null; + d = processAdd("length-none", + doc(f("id", "1111"), + f("foo_s", "string1", "string222"), + f("bar_dt", "string3"), + f("yak_t", ""), + f("foo_d", new Integer(42)))); + + assertNotNull(d); + + assertEquals(Arrays.asList("string1","string222"), + d.getFieldValues("foo_s")); + assertEquals("string3", d.getFieldValue("bar_dt")); + assertEquals("", d.getFieldValue("yak_t")); + assertEquals("processor borked non string value", + new Integer(42), d.getFieldValue("foo_d")); + + d = processAdd("length-some", + doc(f("id", "1111"), + f("foo_s", "string1", "string222"), + f("bar_dt", "string3"), + f("yak_t", ""), + f("foo_d", new Integer(42)))); + + assertNotNull(d); + + assertEquals(Arrays.asList(new Integer(7), new Integer(9)), + d.getFieldValues("foo_s")); + assertEquals("string3", d.getFieldValue("bar_dt")); + assertEquals(new Integer(0), d.getFieldValue("yak_t")); + assertEquals("processor borked non string value", + new Integer(42), d.getFieldValue("foo_d")); + } + + public void testRegexReplace() throws Exception { + SolrInputDocument d = null; + d = processAdd("regex-replace", + doc(f("id", "doc1"), + f("content", "This is a text\t with a lot\n of whitespace"), + f("title", "This\ttitle has a lot of spaces"))); + + assertNotNull(d); + + assertEquals("ThisXisXaXtextXwithXaXlotXofXwhitespace", + d.getFieldValue("content")); + assertEquals("ThisXtitleXhasXaXlotXofXspaces", + d.getFieldValue("title")); + } + + public void testFirstValue() throws Exception { + SolrInputDocument d = null; + + d = processAdd("first-value", + doc(f("id", "1111"), + f("foo_s", "string1", "string222"), + f("bar_s", "string3"), + f("yak_t", "string4", "string5"))); + + assertNotNull(d); + + assertEquals(Arrays.asList("string1"), + d.getFieldValues("foo_s")); + assertEquals(Arrays.asList("string3"), + d.getFieldValues("bar_s")); + assertEquals(Arrays.asList("string4", "string5"), + d.getFieldValues("yak_t")); + } + + public void testLastValue() throws Exception { + SolrInputDocument d = null; + + // basics + + d = processAdd("last-value", + doc(f("id", "1111"), + f("foo_s", "string1", "string222"), + f("bar_s", "string3"), + f("yak_t", "string4", "string5"))); + + assertNotNull(d); + + assertEquals(Arrays.asList("string222"), + d.getFieldValues("foo_s")); + assertEquals(Arrays.asList("string3"), + d.getFieldValues("bar_s")); + assertEquals(Arrays.asList("string4", "string5"), + d.getFieldValues("yak_t")); + + // test optimizations (and force test of defaults) + + SolrInputField special = null; + + // test something that's definitely a SortedSet + + special = new SolrInputField("foo_s"); + special.setValue(new TreeSet + (Arrays.asList("ggg", "first", "last", "hhh")), 1.2F); + + d = processAdd("last-value", + doc(f("id", "1111"), + special)); + + assertNotNull(d); + + assertEquals("last", d.getFieldValue("foo_s")); + + // test something that's definitely a List + + special = new SolrInputField("foo_s"); + special.setValue(Arrays.asList("first", "ggg", "hhh", "last"), 1.2F); + + d = processAdd("last-value", + doc(f("id", "1111"), + special)); + + assertNotNull(d); + + assertEquals("last", d.getFieldValue("foo_s")); + + // test something that is definitely not a List or SortedSet + // (ie: get default behavior of Collection using iterator) + + special = new SolrInputField("foo_s"); + special.setValue(new LinkedHashSet + (Arrays.asList("first", "ggg", "hhh", "last")), 1.2F); + + d = processAdd("last-value", + doc(f("id", "1111"), + special)); + + assertNotNull(d); + + assertEquals("last", d.getFieldValue("foo_s")); + + + } + + public void testMinValue() throws Exception { + SolrInputDocument d = null; + + d = processAdd("min-value", + doc(f("id", "1111"), + f("foo_s", "zzz", "aaa", "bbb"), + f("foo_i", 42, 128, -3), + f("bar_s", "aaa"), + f("yak_t", "aaa", "bbb"))); + + assertNotNull(d); + + assertEquals(Arrays.asList("aaa"), + d.getFieldValues("foo_s")); + assertEquals(Arrays.asList(-3), + d.getFieldValues("foo_i")); + assertEquals(Arrays.asList("aaa"), + d.getFieldValues("bar_s")); + assertEquals(Arrays.asList("aaa", "bbb"), + d.getFieldValues("yak_t")); + + // uncomparable should not fail + + d = processAdd("min-value", + doc(f("id", "1111"), + f("foo_s", "zzz", new Integer(42), "bbb"), + f("bar_s", "aaa"), + f("yak_t", "aaa", "bbb"))); + + assertNotNull(d); + + assertEquals(Arrays.asList("zzz", new Integer(42), "bbb"), + d.getFieldValues("foo_s")); + assertEquals(Arrays.asList("aaa"), + d.getFieldValues("bar_s")); + assertEquals(Arrays.asList("aaa", "bbb"), + d.getFieldValues("yak_t")); + + + } + + public void testMaxValue() throws Exception { + SolrInputDocument d = null; + + d = processAdd("max-value", + doc(f("id", "1111"), + f("foo_s", "zzz", "aaa", "bbb"), + f("foo_i", 42, 128, -3), + f("bar_s", "aaa"), + f("yak_t", "aaa", "bbb"))); + + assertNotNull(d); + + assertEquals(Arrays.asList("zzz"), + d.getFieldValues("foo_s")); + assertEquals(Arrays.asList(128), + d.getFieldValues("foo_i")); + assertEquals(Arrays.asList("aaa"), + d.getFieldValues("bar_s")); + assertEquals(Arrays.asList("aaa", "bbb"), + d.getFieldValues("yak_t")); + + // uncomparable should not fail + + d = processAdd("max-value", + doc(f("id", "1111"), + f("foo_s", "zzz", new Integer(42), "bbb"), + f("bar_s", "aaa"), + f("yak_t", "aaa", "bbb"))); + + assertNotNull(d); + + assertEquals(Arrays.asList("zzz", new Integer(42), "bbb"), + d.getFieldValues("foo_s")); + assertEquals(Arrays.asList("aaa"), + d.getFieldValues("bar_s")); + assertEquals(Arrays.asList("aaa", "bbb"), + d.getFieldValues("yak_t")); + + + } + + public void testHtmlStrip() throws Exception { + SolrInputDocument d = null; + + d = processAdd("html-strip", + doc(f("id", "1111"), + f("html_s", "hi & bye", "aaa", "bbb"), + f("bar_s", "hi & bye"))); + + assertNotNull(d); + + assertEquals(Arrays.asList("hi & bye", "aaa", "bbb"), + d.getFieldValues("html_s")); + assertEquals("hi & bye", d.getFieldValue("bar_s")); + + } + + public void testConcatDefaults() throws Exception { + SolrInputDocument d = null; + d = processAdd("concat-defaults", + doc(f("id", "1111", "222"), + f("attr_foo", "string1", "string2"), + f("foo_s1", "string3", "string4"), + f("bar_dt", "string5", "string6"), + f("bar_HOSS_s", "string7", "string8"), + f("foo_d", new Integer(42)))); + + assertNotNull(d); + + assertEquals("1111, 222", d.getFieldValue("id")); + assertEquals(Arrays.asList("string1","string2"), + d.getFieldValues("attr_foo")); + assertEquals("string3, string4", d.getFieldValue("foo_s1")); + assertEquals(Arrays.asList("string5","string6"), + d.getFieldValues("bar_dt")); + assertEquals(Arrays.asList("string7","string8"), + d.getFieldValues("bar_HOSS_s")); + assertEquals("processor borked non string value", + new Integer(42), d.getFieldValue("foo_d")); + + } + + public void testConcatExplicit() throws Exception { + doSimpleDelimTest("concat-field", ", "); + } + public void testConcatExplicitWithDelim() throws Exception { + doSimpleDelimTest("concat-type-delim", "; "); + } + private void doSimpleDelimTest(final String chain, final String delim) + throws Exception { + + SolrInputDocument d = null; + d = processAdd(chain, + doc(f("id", "1111"), + f("foo_t", "string1", "string2"), + f("foo_d", new Integer(42)), + field("foo_s", 3.0F, "string3", "string4"))); + + assertNotNull(d); + + assertEquals(Arrays.asList("string1","string2"), + d.getFieldValues("foo_t")); + assertEquals("string3" + delim + "string4", d.getFieldValue("foo_s")); + + // slightly more interesting + assertEquals("processor borked non string value", + new Integer(42), d.getFieldValue("foo_d")); + assertEquals("wrong boost", + 3.0F, d.getField("foo_s").getBoost(), 0.0F); + } + + /** + * Convinience method for building up SolrInputDocuments + */ + SolrInputDocument doc(SolrInputField... fields) { + SolrInputDocument d = new SolrInputDocument(); + for (SolrInputField f : fields) { + d.put(f.getName(), f); + } + return d; + } + + /** + * Convinience method for building up SolrInputFields + */ + SolrInputField field(String name, float boost, Object... values) { + SolrInputField f = new SolrInputField(name); + for (Object v : values) { + f.addValue(v, 1.0F); + } + f.setBoost(boost); + return f; + } + + /** + * Convinience method for building up SolrInputFields with default boost + */ + SolrInputField f(String name, Object... values) { + return field(name, 1.0F, values); + } + + + /** + * Runs a document through the specified chain, and returns the final + * document used when the chain is completed (NOTE: some chains may + * modifiy the document in place + */ + SolrInputDocument processAdd(final String chain, + final SolrInputDocument docIn) + throws IOException { + + SolrCore core = h.getCore(); + UpdateRequestProcessorChain pc = core.getUpdateProcessingChain(chain); + assertNotNull("No Chain named: " + chain, pc); + + SolrQueryResponse rsp = new SolrQueryResponse(); + + SolrQueryRequest req = new LocalSolrQueryRequest + (core, new ModifiableSolrParams()); + try { + AddUpdateCommand cmd = new AddUpdateCommand(req); + cmd.solrDoc = docIn; + + UpdateRequestProcessor processor = pc.createProcessor(req, rsp); + processor.processAdd(cmd); + + return cmd.solrDoc; + } finally { + req.close(); + } + } +}