SOLR-2802: several new UpdateProcessorFactories for modifing fields of documents, along with base classes to make writing these types of classes easier for users

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1242514 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Chris M. Hostetter 2012-02-09 20:41:21 +00:00
parent e825015535
commit 4906b8a614
17 changed files with 2478 additions and 0 deletions

View File

@ -207,6 +207,21 @@ New Features
* SOLR-1726: Added deep paging support to search (sort by score only) which should use less memory when paging deeply into results * SOLR-1726: Added deep paging support to search (sort by score only) which should use less memory when paging deeply into results
by keeping the priority queue small. (Manojkumar Rangasamy Kannadasan, gsingers) by keeping the priority queue small. (Manojkumar Rangasamy Kannadasan, gsingers)
* SOLR-2802: New FieldMutatingUpdateProcessor and Factory to simlify the
development of UpdateProcessors that modify field values of documents as
they are indexed. Also includes several useful new implementations:
RemoveBlankFieldUpdateProcessorFactory
TrimFieldUpdateProcessorFactory
HTMLStripFieldUpdateProcessorFactory
RegexReplaceProcessorFactory
FieldLengthUpdateProcessorFactory
ConcatFieldUpdateProcessorFactory
FirstFieldValueUpdateProcessorFactory
LastFieldValueUpdateProcessorFactory
MinFieldValueUpdateProcessorFactory
MaxFieldValueUpdateProcessorFactory
(hossman, janhoy)
Optimizations Optimizations
---------------------- ----------------------

View File

@ -0,0 +1,124 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.update.processor;
import org.apache.solr.core.SolrCore;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.schema.TextField;
import org.apache.solr.schema.StrField;
import org.apache.solr.common.SolrInputField;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.commons.lang.StringUtils;
/**
* Concatenates multiple values for fields matching the specified
* conditions using a configurable <code>delimiter</code> which defaults
* to "<code> ,</code>".
* <p>
* By default, this processor concatenates the values for any field name
* which according to the schema is <code>multiValued="false"</code>
* and uses <code>TextField</code> or <code>StrField</code>
* </p>
*
* <p>
* For example, in the configuration below, any "single valued" string and
* text field which is found to contain multiple values <i>except</i> for
* the <code>primary_author</code> field will be concatenated using the
* string "<code> ;</code>" as a delimeter. For the
* <code>primary_author</code> field, the multiple values will be left
* alone for <code>FirstFieldValueUpdateProcessorFactory</code> to deal with.
* </p>
*
* <pre class="prettyprint">
* &lt;updateRequestProcessorChain&gt;
* &lt;processor class="solr.ConcatFieldUpdateProcessorFactory"&gt;
* &lt;str name="delimiter"&gt;; &lt;/str&gt;
* &lt;lst name="exclude"&gt;
* &lt;str name="fieldName"&gt;primary_author&lt;/str&gt;
* &lt;/lst&gt;
* &lt;/processor&gt;
* &lt;processor class="solr.FirstFieldValueUpdateProcessorFactory"&gt;
* &lt;str name="fieldName"&gt;primary_author&lt;/str&gt;
* &lt;/processor&gt;
* &lt;/updateRequestProcessorChain&gt;
* </pre>
*/
public final class ConcatFieldUpdateProcessorFactory extends FieldMutatingUpdateProcessorFactory {
String delimiter = ", ";
@SuppressWarnings("unchecked")
@Override
public void init(NamedList args) {
Object d = args.remove("delimiter");
if (null != d) delimiter = d.toString();
super.init(args);
}
@Override
public UpdateRequestProcessor getInstance(SolrQueryRequest req,
SolrQueryResponse rsp,
UpdateRequestProcessor next) {
return new FieldMutatingUpdateProcessor(getSelector(), next) {
protected SolrInputField mutate(final SolrInputField src) {
if (src.getValueCount() <= 1) return src;
SolrInputField result = new SolrInputField(src.getName());
result.setValue(StringUtils.join(src.getValues(), delimiter),
src.getBoost());
return result;
}
};
}
@Override
public FieldMutatingUpdateProcessor.FieldNameSelector
getDefaultSelector(final SolrCore core) {
final IndexSchema schema = core.getSchema();
return new FieldMutatingUpdateProcessor.FieldNameSelector() {
public boolean shouldMutate(final String fieldName) {
// first check type since it should be fastest
FieldType type = schema.getFieldTypeNoEx(fieldName);
if (null == type) return false;
if (! (TextField.class.isInstance(type)
|| StrField.class.isInstance(type))) {
return false;
}
// only ask for SchemaField if we passed the type check.
SchemaField sf = schema.getFieldOrNull(fieldName);
// shouldn't be null since since type wasn't, but just in case
if (null == sf) return false;
return ! sf.multiValued();
}
};
}
}

View File

@ -0,0 +1,80 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.update.processor;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.core.SolrCore;
/**
* Replaces any CharSequence values found in fields matching the specified
* conditions with the lengths of those CharSequences (as an Integer).
* <p>
* By default, this processor matches no fields.
* </p>
* <p>For example, with the configuration listed below any documents
* containing String values (such as "<code>abcdef</code>" or
* "<code>xyz</code>") in a field declared in the schema using
* <code>TrieIntField</code> or <code>TrieLongField</code>
* would have those Strings replaced with the length of those fields as an
* Integer
* (ie: <code>6</code> and <code>3</code> respectively)
* </p>
* <pre class="prettyprint">
* &lt;processor class="solr.FieldLengthUpdateProcessorFactory"&gt;
* &lt;arr name="typeClass"&gt;
* &lt;str&gt;solr.TrieIntField&lt;/str&gt;
* &lt;str&gt;solr.TrieLongField&lt;/str&gt;
* &lt;/arr&gt;
* &lt;/processor&gt;
* </pre>
*/
public final class FieldLengthUpdateProcessorFactory extends FieldMutatingUpdateProcessorFactory {
@SuppressWarnings("unchecked")
@Override
public void init(NamedList args) {
// no length specific init args
super.init(args);
}
@Override
public FieldMutatingUpdateProcessor.FieldNameSelector
getDefaultSelector(final SolrCore core) {
return FieldMutatingUpdateProcessor.SELECT_NO_FIELDS;
}
@Override
public UpdateRequestProcessor getInstance(SolrQueryRequest req,
SolrQueryResponse rsp,
UpdateRequestProcessor next) {
return new FieldValueMutatingUpdateProcessor(getSelector(), next) {
protected Object mutateValue(final Object src) {
if (src instanceof CharSequence) {
return new Integer(((CharSequence)src).length());
}
return src;
}
};
}
}

View File

@ -0,0 +1,283 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.update.processor;
import java.io.IOException;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import static org.apache.solr.common.SolrException.ErrorCode.*;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField;
import org.apache.solr.common.SolrException;
import org.apache.solr.core.SolrResourceLoader;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.FieldType;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.update.AddUpdateCommand;
/**
* Reusable base class for UpdateProcessors that will consider
* AddUpdateCommands and mutate the values assocaited with configured
* fields.
* <p>
* Subclasses should override the mutate method to specify how individual
* SolrInputFields identified by the selector associated with this instance
* will be mutated.
* </p>
*
* @see FieldMutatingUpdateProcessorFactory
* @see FieldValueMutatingUpdateProcessor
* @see FieldNameSelector
*/
public abstract class FieldMutatingUpdateProcessor
extends UpdateRequestProcessor {
private final FieldNameSelector selector;
public FieldMutatingUpdateProcessor(FieldNameSelector selector,
UpdateRequestProcessor next) {
super(next);
this.selector = selector;
}
/**
* Method for mutating SolrInputFields associated with fields identified
* by the FieldNameSelector associated with this processor
* @param src the SolrInputField to mutate, may be modified in place and
* returned
* @return the SolrInputField to use in replacing the original (src) value.
* If null the field will be removed.
*/
protected abstract SolrInputField mutate(final SolrInputField src);
@Override
public void processAdd(AddUpdateCommand cmd) throws IOException {
final SolrInputDocument doc = cmd.getSolrInputDocument();
// make a copy we can iterate over while mutating the doc
final Collection<String> fieldNames
= new ArrayList<String>(doc.getFieldNames());
for (final String fname : fieldNames) {
if (! selector.shouldMutate(fname)) continue;
final SolrInputField src = doc.get(fname);
final SolrInputField dest = mutate(src);
if (null == dest) {
doc.remove(fname);
} else {
// semantics of what happens if dest has diff name are hard
// we could treat it as a copy, or a rename
// for now, don't allow it.
if (! fname.equals(dest.getName()) ) {
throw new SolrException(SERVER_ERROR,
"mutute returned field with different name: "
+ fname + " => " + dest.getName());
}
doc.put(dest.getName(), dest);
}
}
super.processAdd(cmd);
}
/**
* Interface for idenfifying which fileds should be mutated
*/
public static interface FieldNameSelector {
public boolean shouldMutate(final String fieldName);
}
/** Singleton indicating all fields should be mutated */
public static final FieldNameSelector SELECT_ALL_FIELDS
= new FieldNameSelector() {
public boolean shouldMutate(final String fieldName) {
return true;
}
};
/** Singleton indicating no fields should be mutated */
public static final FieldNameSelector SELECT_NO_FIELDS
= new FieldNameSelector() {
public boolean shouldMutate(final String fieldName) {
return false;
}
};
/**
* Wraps two FieldNameSelectors such that the FieldNameSelector
* returned matches all fields specified by the "includes" unless they
* are matched by "excludes"
* @param includes a selector identifying field names that should be selected
* @param excludes a selector identifying field names that should be
* <i>not</i> be selected, even if they are matched by the 'includes'
* selector
* @return Either a new FieldNameSelector or one of the input selecors
* if the combination lends itself to optimization.
*/
public static FieldNameSelector wrap(final FieldNameSelector includes,
final FieldNameSelector excludes) {
if (SELECT_NO_FIELDS == excludes) {
return includes;
}
if (SELECT_ALL_FIELDS == excludes) {
return SELECT_NO_FIELDS;
}
if (SELECT_ALL_FIELDS == includes) {
return new FieldNameSelector() {
public boolean shouldMutate(final String fieldName) {
return ! excludes.shouldMutate(fieldName);
}
};
}
return new FieldNameSelector() {
public boolean shouldMutate(final String fieldName) {
return (includes.shouldMutate(fieldName)
&& ! excludes.shouldMutate(fieldName));
}
};
}
/**
* Utility method that can be used to define a FieldNameSelector
* using the same types of rules as the FieldMutatingUpdateProcessor init
* code. This may be useful for Factories that wish to define default
* selectors in similar terms to what the configuration would look like.
* @lucene.internal
*/
public static FieldNameSelector createFieldNameSelector
(final SolrResourceLoader loader,
final IndexSchema schema,
final Set<String> fields,
final Set<String> typeNames,
final Collection<String> typeClasses,
final Collection<Pattern> regexes,
final FieldNameSelector defSelector) {
final Collection<Class> classes
= new ArrayList<Class>(typeClasses.size());
for (String t : typeClasses) {
try {
classes.add(loader.findClass(t));
} catch (Exception e) {
throw new SolrException(SERVER_ERROR,
"Can't resolve typeClass: " + t, e);
}
}
if (classes.isEmpty() &&
typeNames.isEmpty() &&
regexes.isEmpty() &&
fields.isEmpty()) {
return defSelector;
}
return new ConfigurableFieldNameSelector
(schema, fields, typeNames, classes, regexes);
}
private static final class ConfigurableFieldNameSelector
implements FieldNameSelector {
final IndexSchema schema;
final Set<String> fields;
final Set<String> typeNames;
final Collection<Class> classes;
final Collection<Pattern> regexes;
private ConfigurableFieldNameSelector(final IndexSchema schema,
final Set<String> fields,
final Set<String> typeNames,
final Collection<Class> classes,
final Collection<Pattern> regexes) {
this.schema = schema;
this.fields = fields;
this.typeNames = typeNames;
this.classes = classes;
this.regexes = regexes;
}
public boolean shouldMutate(final String fieldName) {
// order of checks is bsaed on what should be quicker
// (ie: set lookups faster the looping over instanceOf / matches tests
if ( ! (fields.isEmpty() || fields.contains(fieldName)) ) {
return false;
}
// do not consider it an error if the fieldName has no type
// there might be another processor dealing with it later
FieldType t = schema.getFieldTypeNoEx(fieldName);
if (null != t) {
if (! (typeNames.isEmpty() || typeNames.contains(t.getTypeName())) ) {
return false;
}
if (! (classes.isEmpty() || instanceOfAny(t, classes)) ) {
return false;
}
}
if (! (regexes.isEmpty() || matchesAny(fieldName, regexes)) ) {
return false;
}
return true;
}
/**
* returns true if the Object 'o' is an instance of any class in
* the Collection
*/
private static boolean instanceOfAny(Object o, Collection<Class> classes) {
for (Class c : classes) {
if ( c.isInstance(o) ) return true;
}
return false;
}
/**
* returns true if the CharSequence 's' matches any Pattern in the
* Collection
*/
private static boolean matchesAny(CharSequence s,
Collection<Pattern> regexes) {
for (Pattern p : regexes) {
if (p.matcher(s).matches()) return true;
}
return false;
}
}
}

View File

@ -0,0 +1,284 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.update.processor;
import java.io.IOException;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import org.apache.solr.core.SolrCore;
import org.apache.solr.common.SolrException;
import static org.apache.solr.common.SolrException.ErrorCode.*;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.update.AddUpdateCommand;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.FieldType;
import org.apache.solr.util.plugin.SolrCoreAware;
/**
* Base class for implementing Factories for FieldMutatingUpdateProcessors and
* FieldValueMutatingUpdateProcessors.
*
* <p>
* This class provides all of the plumbing for configuring the
* FieldNameSelector using the following init params to specify selection
* critera...
* </p>
* <ul>
* <li><code>fieldName</code> - selecting specific fields by field name lookup</li>
* <li><code>fieldRegex</code> - selecting specific fields by field name regex match (regexes are checked in the order specified)</li>
* <li><code>typeName</code> - selecting specific fields by fieldType name lookup</li>
* <li><code>typeClass</code> - selecting specific fields by fieldType class lookup, including inheritence and interfaces</li>
* </ul>
*
* <p>
* Each critera can specified as either an &lt;arr&gt; of &lt;str&gt;, or
* multiple &lt;str&gt; with the same name. When multiple criteria of a
* single type exist, fields must match <b>at least one</b> to be selected.
* If more then one type of critera exist, fields must match
* <b>at least one of each</b> to be selected.
* </p>
* <p>
* One or more <code>excludes</code> &lt;lst&gt; params may also be specified,
* containing any of the above criteria, identifying fields to be excluded
* from seelction even if they match the selection criteria. As with the main
* selection critiera a field must match all of criteria in a single exclusion
* in order to be excluded, but multiple exclusions may be specified to get an
* <code>OR</code> behavior
* </p>
*
* <p>
* In the ExampleFieldMutatingUpdateProcessorFactory configured below,
* fields will be mutated if the name starts with "foo" <i>or</i> "bar";
* <b>unless</b> the field name contains the substring "SKIP" <i>or</i>
* the fieldType is (or subclasses) DateField. Meaning a field named
* "foo_SKIP" is gaurunteed not to be selected, but a field named "bar_smith"
* that uses StrField will be selected.
* </p>
* <pre class="prettyprint">
* &lt;processor class="solr.ExampleFieldMutatingUpdateProcessorFactory"&gt;
* &lt;str name="fieldRegex"&gt;foo.*&lt;/str&gt;
* &lt;str name="fieldRegex"&gt;bar.*&lt;/str&gt;
* &lt;!-- each set of exclusions is checked independently --&gt;
* &lt;lst name="exclude"&gt;
* &lt;str name="fieldRegex"&gt;.*SKIP.*&lt;/str&gt;
* &lt;/lst&gt;
* &lt;lst name="exclude"&gt;
* &lt;str name="typeClass"&gt;solr.DateField&lt;/str&gt;
* &lt;/lst&gt;
* &lt;/processor&gt;
* </pre>
*
* <p>
* Subclasses define the default selection behavior to be applied if no
* criteria is configured by the user. User configured "exclude" criteria
* will be applied to the subclass defined default selector.
* </p>
*
* @see FieldMutatingUpdateProcessor
* @see FieldValueMutatingUpdateProcessor
* @see FieldMutatingUpdateProcessor.FieldNameSelector
*/
public abstract class FieldMutatingUpdateProcessorFactory
extends UpdateRequestProcessorFactory
implements SolrCoreAware {
private static class SelectorParams {
public Set<String> fieldName = Collections.emptySet();
public Set<String> typeName = Collections.emptySet();
public Collection<String> typeClass = Collections.emptyList();
public Collection<Pattern> fieldRegex = Collections.emptyList();
}
private SelectorParams inclusions = new SelectorParams();
private Collection<SelectorParams> exclusions
= new ArrayList<SelectorParams>();
private FieldMutatingUpdateProcessor.FieldNameSelector selector = null;
protected final FieldMutatingUpdateProcessor.FieldNameSelector getSelector() {
if (null != selector) return selector;
throw new SolrException(SERVER_ERROR, "selector was never initialized, "+
" inform(SolrCore) never called???");
}
@SuppressWarnings("unchecked")
private static final SelectorParams parseSelectorParams(NamedList args) {
SelectorParams params = new SelectorParams();
params.fieldName = new HashSet<String>(oneOrMany(args, "fieldName"));
params.typeName = new HashSet<String>(oneOrMany(args, "typeName"));
// we can compile the patterns now
Collection<String> patterns = oneOrMany(args, "fieldRegex");
if (! patterns.isEmpty()) {
params.fieldRegex = new ArrayList<Pattern>(patterns.size());
for (String s : patterns) {
try {
params.fieldRegex.add(Pattern.compile(s));
} catch (PatternSyntaxException e) {
throw new SolrException
(SERVER_ERROR, "Invalid 'fieldRegex' pattern: " + s, e);
}
}
}
// resolve this into actual Class objects later
params.typeClass = oneOrMany(args, "typeClass");
return params;
}
/**
* Handles common initialization related to source fields for
* constructoring the FieldNameSelector to be used.
*
* Will error if any unexpected init args are found, so subclasses should
* remove any subclass-specific init args before calling this method.
*/
@SuppressWarnings("unchecked")
@Override
public void init(NamedList args) {
inclusions = parseSelectorParams(args);
List<Object> excList = args.getAll("exclude");
for (Object excObj : excList) {
if (null == excObj) {
throw new SolrException
(SERVER_ERROR, "'exclude' init param can not be null");
}
if (! (excObj instanceof NamedList) ) {
throw new SolrException
(SERVER_ERROR, "'exclude' init param must be <lst/>");
}
NamedList exc = (NamedList) excObj;
exclusions.add(parseSelectorParams(exc));
if (0 < exc.size()) {
throw new SolrException(SERVER_ERROR,
"Unexpected 'exclude' init sub-param(s): '" +
args.getName(0) + "'");
}
// call once per instance
args.remove("exclude");
}
if (0 < args.size()) {
throw new SolrException(SERVER_ERROR,
"Unexpected init param(s): '" +
args.getName(0) + "'");
}
}
public void inform(final SolrCore core) {
final IndexSchema schema = core.getSchema();
selector =
FieldMutatingUpdateProcessor.createFieldNameSelector
(core.getResourceLoader(),
core.getSchema(),
inclusions.fieldName,
inclusions.typeName,
inclusions.typeClass,
inclusions.fieldRegex,
getDefaultSelector(core));
for (SelectorParams exc : exclusions) {
selector = FieldMutatingUpdateProcessor.wrap
(selector,
FieldMutatingUpdateProcessor.createFieldNameSelector
(core.getResourceLoader(),
core.getSchema(),
exc.fieldName,
exc.typeName,
exc.typeClass,
exc.fieldRegex,
FieldMutatingUpdateProcessor.SELECT_NO_FIELDS));
}
}
/**
* Defines the default selection behavior when the user has not
* configured any specific criteria for selecting fields. The Default
* implementation matches all fields, and should be overridden by subclasses
* as needed.
*
* @see FieldMutatingUpdateProcessor#SELECT_ALL_FIELDS
*/
protected FieldMutatingUpdateProcessor.FieldNameSelector
getDefaultSelector(final SolrCore core) {
return FieldMutatingUpdateProcessor.SELECT_ALL_FIELDS;
}
/**
* Removes all instance of the key from NamedList, returning the Set of
* Strings that key refered to. Throws an error if the key didn't refer
* to one or more strings (or arrays of strings)
* @exception SolrException invalid arr/str structure.
*/
private static Collection<String> oneOrMany(final NamedList args, final String key) {
List<String> result = new ArrayList<String>(args.size() / 2);
final String err = "init arg '" + key + "' must be a string "
+ "(ie: 'str'), or an array (ie: 'arr') containing strings; found: ";
for (Object o = args.remove(key); null != o; o = args.remove(key)) {
if (o instanceof String) {
result.add((String)o);
continue;
}
if (o instanceof Object[]) {
o = Arrays.asList((Object[]) o);
}
if (o instanceof Collection) {
for (Object item : (Collection)o) {
if (! (item instanceof String)) {
throw new SolrException(SERVER_ERROR, err + item.getClass());
}
result.add((String)item);
}
continue;
}
// who knows what the hell we have
throw new SolrException(SERVER_ERROR, err + o.getClass());
}
return result;
}
}

View File

@ -0,0 +1,81 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.update.processor;
import org.apache.solr.common.SolrInputField;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Abstract subclass of FieldMutatingUpdateProcessor for implementing
* UpdateProcessors that will mutate all individual values of a selected
* field independently
*
* @see FieldMutatingUpdateProcessorFactory
*/
public abstract class FieldValueMutatingUpdateProcessor
extends FieldMutatingUpdateProcessor {
private static final Logger log = LoggerFactory.getLogger(FieldValueMutatingUpdateProcessor.class);
public static final Object DELETE_VALUE_SINGLETON = new Object() {
public String toString() {
return "!!Singleton Object Triggering Value Deletion!!";
}
};
public FieldValueMutatingUpdateProcessor(FieldNameSelector selector,
UpdateRequestProcessor next) {
super(selector, next);
}
/**
* Mutates individual values of a field as needed, or returns the original
* value.
*
* @param src a value from a matched field which should be mutated
* @return the value to use as a replacement for src, or
* <code>DELETE_VALUE_SINGLETON</code> to indicate that the value
* should be removed completely.
* @see #DELETE_VALUE_SINGLETON
*/
protected abstract Object mutateValue(final Object src);
protected final SolrInputField mutate(final SolrInputField src) {
SolrInputField result = new SolrInputField(src.getName());
for (final Object srcVal : src.getValues()) {
final Object destVal = mutateValue(srcVal);
if (DELETE_VALUE_SINGLETON == destVal) {
/* NOOP */
log.debug("removing value from field '{}': {}",
src.getName(), srcVal);
} else {
if (destVal != srcVal) {
log.debug("replace value from field '{}': {} with {}",
new Object[] { src.getName(), srcVal, destVal });
}
result.addValue(destVal, 1.0F);
}
}
result.setBoost(src.getBoost());
return 0 == result.getValueCount() ? null : result;
}
}

View File

@ -0,0 +1,59 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.update.processor;
import org.apache.solr.core.SolrCore;
import org.apache.solr.common.SolrInputField;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import java.util.Collection;
/**
* Base class for processors that want to mutate selected fields to only
* keep a subset of the original values.
* @see #pickSubset
*/
public abstract class FieldValueSubsetUpdateProcessorFactory extends FieldMutatingUpdateProcessorFactory {
@Override
public final UpdateRequestProcessor getInstance(SolrQueryRequest req,
SolrQueryResponse rsp,
UpdateRequestProcessor next) {
return new FieldMutatingUpdateProcessor(getSelector(), next) {
protected SolrInputField mutate(final SolrInputField src) {
if (src.getValueCount() <= 1) return src;
SolrInputField result = new SolrInputField(src.getName());
result.setValue(pickSubset(src.getValues()),
src.getBoost());
return result;
}
};
}
/**
* Method subclasses must override to specify which values should be kept.
* This method will not be called unless the collection contains more then
* one value.
*/
protected abstract Collection<Object> pickSubset(Collection<Object> values);
}

View File

@ -0,0 +1,65 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.update.processor;
import org.apache.solr.core.SolrCore;
import java.util.Collections;
import java.util.Collection;
import java.util.Iterator;
/**
* Keeps only the first value of fields matching the specified
* conditions. Correct behavior assumes that the SolrInputFields being mutated
* are either single valued, or use an ordered Collection (ie: not a Set).
* <p>
* By default, this processor matches no fields.
* </p>
*
* <p>
* For example, in the configuration below, if a field named
* <code>primary_author</code> contained multiple values (ie:
* <code>"Adam Doe", "Bob Smith", "Carla Jones"</code>) then only the first
* value (ie: <code>"Adam Doe"</code>) will be kept
* </p>
*
* <pre class="prettyprint">
* &lt;processor class="solr.FirstFieldValueUpdateProcessorFactory"&gt;
* &lt;str name="fieldName"&gt;primary_author&lt;/str&gt;
* &lt;/processor&gt;
* </pre>
*
* @see LastFieldValueUpdateProcessorFactory
*/
public final class FirstFieldValueUpdateProcessorFactory extends FieldValueSubsetUpdateProcessorFactory {
@Override
public Collection<Object> pickSubset(Collection<Object> values) {
// trust the iterator
return Collections.singletonList(values.iterator().next());
}
@Override
public FieldMutatingUpdateProcessor.FieldNameSelector
getDefaultSelector(final SolrCore core) {
return FieldMutatingUpdateProcessor.SELECT_NO_FIELDS;
}
}

View File

@ -0,0 +1,88 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.update.processor;
import org.apache.solr.core.SolrCore;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.lucene.analysis.CharReader;
import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter;
import org.apache.commons.io.IOUtils;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.io.StringWriter;
/**
* Strips all HTML Markup in any CharSequence values
* found in fields matching the specified conditions.
* <p>
* By default this processor matches no fields
* </p>
*
* <p>For example, with the configuration listed below any documents
* containing HTML markup in any field declared in the schema using
* <code>StrField</code> will have that HTML striped away.
* </p>
* <pre class="prettyprint">
* &lt;processor class="solr.HTMLStripFieldUpdateProcessorFactory"&gt;
* &lt;str name="typeClass"&gt;solr.StrField&lt;/str&gt;
* &lt;/processor&gt;
* </pre>
*/
public final class HTMLStripFieldUpdateProcessorFactory extends FieldMutatingUpdateProcessorFactory {
@Override
public FieldMutatingUpdateProcessor.FieldNameSelector
getDefaultSelector(final SolrCore core) {
return FieldMutatingUpdateProcessor.SELECT_NO_FIELDS;
}
@Override
public UpdateRequestProcessor getInstance(SolrQueryRequest req,
SolrQueryResponse rsp,
UpdateRequestProcessor next) {
return new FieldValueMutatingUpdateProcessor(getSelector(), next) {
protected Object mutateValue(final Object src) {
if (src instanceof CharSequence) {
CharSequence s = (CharSequence)src;
StringWriter result = new StringWriter(s.length());
Reader in = null;
try {
in = new HTMLStripCharFilter
(CharReader.get(new StringReader(s.toString())));
IOUtils.copy(in, result);
return result.toString();
} catch (IOException e) {
// we tried and failed
return s;
} finally {
IOUtils.closeQuietly(in);
}
}
return src;
}
};
}
}

View File

@ -0,0 +1,81 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.update.processor;
import org.apache.solr.core.SolrCore;
import java.util.Collections;
import java.util.Collection;
import java.util.List;
import java.util.SortedSet;
import java.util.Iterator;
/**
* Keeps only the last value of fields matching the specified
* conditions. Correct behavior assumes that the SolrInputFields being mutated
* are either single valued, or use an ordered Collection (ie: not a Set).
* <p>
* By default, this processor matches no fields.
* </p>
*
* <p>
* For example, in the configuration below, if a field named
* <code>primary_author</code> contained multiple values (ie:
* <code>"Adam Doe", "Bob Smith", "Carla Jones"</code>) then only the last
* value (ie: <code>"Carla Jones"</code>) will be kept
* </p>
*
* <pre class="prettyprint">
* &lt;processor class="solr.LastFieldValueUpdateProcessorFactory"&gt;
* &lt;str name="fieldName"&gt;primary_author&lt;/str&gt;
* &lt;/processor&gt;
* </pre>
*
* @see FirstFieldValueUpdateProcessorFactory
*/
public final class LastFieldValueUpdateProcessorFactory extends FieldValueSubsetUpdateProcessorFactory {
@Override
public Collection<Object> pickSubset(Collection<Object> values) {
Object result = null;
if (values instanceof List) {
// optimize index lookup
List l = (List)values;
result = l.get(l.size()-1);
} else if (values instanceof SortedSet) {
// optimize tail lookup
result = ((SortedSet)values).last();
} else {
// trust the iterator
for (Object o : values) { result = o; }
}
return Collections.singletonList(result);
}
@Override
public FieldMutatingUpdateProcessor.FieldNameSelector
getDefaultSelector(final SolrCore core) {
return FieldMutatingUpdateProcessor.SELECT_NO_FIELDS;
}
}

View File

@ -0,0 +1,75 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.update.processor;
import org.apache.solr.core.SolrCore;
import java.util.Collections;
import java.util.Collection;
import java.util.Iterator;
/**
* An update processor that keeps only the the maximum value from any selected
* fields where multiple values are found. Correct behavior assumes that all
* of the values in the SolrInputFields being mutated are mutually comparable;
* If this is not the case, then the full list of all values found will be
* used as is.
* <p>
* By default, this processor matches no fields.
* </p>
*
* <p>
* In the example configuration below, if a document contains multiple integer
* values (ie: <code>64, 128, 1024</code>) in the field
* <code>largestFileSize</code> then only the biggest value
* (ie: <code>1024</code>) will be kept in that field.
* <p>
*
* <pre class="prettyprint">
* &lt;processor class="solr.MaxFieldValueUpdateProcessorFactory"&gt;
* &lt;str name="fieldName"&gt;largestFileSize&lt;/str&gt;
* &lt;/processor&gt;
* </pre>
*
* @see MinFieldValueUpdateProcessorFactory
* @see Collections#max
*/
public final class MaxFieldValueUpdateProcessorFactory extends FieldValueSubsetUpdateProcessorFactory {
@Override
@SuppressWarnings("unchecked")
public Collection<Object> pickSubset(Collection<Object> values) {
Collection<Object> result = values;
try {
result = Collections.singletonList
(Collections.max((Collection)values));
} catch (ClassCastException e) {
/* NOOP */
}
return result;
}
@Override
public FieldMutatingUpdateProcessor.FieldNameSelector
getDefaultSelector(final SolrCore core) {
return FieldMutatingUpdateProcessor.SELECT_NO_FIELDS;
}
}

View File

@ -0,0 +1,75 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.update.processor;
import org.apache.solr.core.SolrCore;
import java.util.Collections;
import java.util.Collection;
import java.util.Iterator;
/**
* An update processor that keeps only the the minimum value from any selected
* fields where multiple values are found. Correct behavior assumes that all
* of the values in the SolrInputFields being mutated are mutually comparable;
* If this is not the case, then the full list of all values found will be
* used as is.
* <p>
* By default, this processor matches no fields.
* </p>
*
* <p>
* In the example configuration below, if a document contains multiple integer
* values (ie: <code>64, 128, 1024</code>) in the field
* <code>smallestFileSize</code> then only the smallest value
* (ie: <code>64</code>) will be kept in that field.
* <p>
*
* <pre class="prettyprint">
* &lt;processor class="solr.MinFieldValueUpdateProcessorFactory"&gt;
* &lt;str name="fieldName"&gt;smallestFileSize&lt;/str&gt;
* &lt;/processor&gt;
* </pre>
*
* @see MaxFieldValueUpdateProcessorFactory
* @see Collections#min
*/
public final class MinFieldValueUpdateProcessorFactory extends FieldValueSubsetUpdateProcessorFactory {
@Override
@SuppressWarnings("unchecked")
public Collection<Object> pickSubset(Collection<Object> values) {
Collection<Object> result = values;
try {
result = Collections.singletonList
(Collections.min((Collection)values));
} catch (ClassCastException e) {
/* NOOP */
}
return result;
}
@Override
public FieldMutatingUpdateProcessor.FieldNameSelector
getDefaultSelector(final SolrCore core) {
return FieldMutatingUpdateProcessor.SELECT_NO_FIELDS;
}
}

View File

@ -0,0 +1,121 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.update.processor;
import org.apache.solr.core.SolrCore;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import java.util.regex.PatternSyntaxException;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* An updated processor that applies a configured regex to any
* CharSequence values found in the selected fields, and replaces
* any matches with the configured replacement string
* <p>
* By default this processor applies itself to no fields.
* </p>
*
* <p>
* For example, with the configuration listed below, any sequence of multiple
* whitespace characters found in values for field named <code>title</code>
* or <code>content</code> will be replaced by a single space character.
* </p>
*
* <pre class="prettyprint">
* &lt;processor class="solr.RegexReplaceProcessorFactory"&gt;
* &lt;str name="fieldName"&gt;content&lt;/str&gt;
* &lt;str name="fieldName"&gt;title&lt;/str&gt;
* &lt;str name="pattern"&gt;\s+&lt;/str&gt;
* &lt;str name="replacement"&gt; &lt;/str&gt;
* &lt;/processor&gt;
* </pre>
*
* @see java.util.regex.Pattern
*/
public final class RegexReplaceProcessorFactory extends FieldMutatingUpdateProcessorFactory {
private static final Logger log = LoggerFactory.getLogger(RegexReplaceProcessorFactory.class);
private static final String REPLACEMENT_PARAM = "replacement";
private static final String PATTERN_PARAM = "pattern";
private Pattern pattern;
private String replacement;
@SuppressWarnings("unchecked")
@Override
public void init(NamedList args) {
String patternParam = args.remove(PATTERN_PARAM).toString();
if(patternParam == null) {
throw new SolrException(ErrorCode.SERVER_ERROR,
"Missing required init parameter: " + PATTERN_PARAM);
}
try {
pattern = Pattern.compile(patternParam);
} catch (PatternSyntaxException e) {
throw new SolrException(ErrorCode.SERVER_ERROR,
"Invalid regex: " + patternParam, e);
}
String replacementParam = args.remove(REPLACEMENT_PARAM).toString();
if(replacementParam == null) {
throw new SolrException(ErrorCode.SERVER_ERROR,
"Missing required init parameter: " + REPLACEMENT_PARAM);
}
replacement = Matcher.quoteReplacement(replacementParam);
super.init(args);
}
/**
* @see FieldMutatingUpdateProcessor#SELECT_NO_FIELDS
*/
protected FieldMutatingUpdateProcessor.FieldNameSelector
getDefaultSelector(final SolrCore core) {
return FieldMutatingUpdateProcessor.SELECT_NO_FIELDS;
}
@Override
public UpdateRequestProcessor getInstance(SolrQueryRequest request,
SolrQueryResponse response,
UpdateRequestProcessor next) {
return new FieldValueMutatingUpdateProcessor(getSelector(), next) {
protected Object mutateValue(final Object src) {
if (src instanceof CharSequence) {
CharSequence txt = (CharSequence)src;
return pattern.matcher(txt).replaceAll(replacement);
}
return src;
}
};
}
}

View File

@ -0,0 +1,70 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.update.processor;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
/**
* Removes any values found which are CharSequence with a length of 0.
* (ie: empty strings)
* <p>
* By default this processor applies itself to all fields.
* </p>
*
* <p>
* For example, with the configuration listed below, blank strings will be
* removed from all fields except those whose name ends with
* "<code>_literal</code>".
* </p>
*
* <pre class="prettyprint">
* &lt;processor class="solr.RemoveBlankFieldUpdateProcessorFactory"&gt;
* &lt;lst name="exclude"&gt;
* &lt;str name="fieldRegex"&gt;.*_literal&lt;/str&gt;
* &lt;/lst&gt;
* &lt;/processor&gt;
* </pre>
*
*/
public final class RemoveBlankFieldUpdateProcessorFactory extends FieldMutatingUpdateProcessorFactory {
@SuppressWarnings("unchecked")
@Override
public void init(NamedList args) {
// no trim specific init args
super.init(args);
}
@Override
public UpdateRequestProcessor getInstance(SolrQueryRequest req,
SolrQueryResponse rsp,
UpdateRequestProcessor next) {
return new FieldValueMutatingUpdateProcessor(getSelector(), next) {
protected Object mutateValue(final Object src) {
if (src instanceof CharSequence
&& 0 == ((CharSequence)src).length()) {
return DELETE_VALUE_SINGLETON;
}
return src;
}
};
}
}

View File

@ -0,0 +1,68 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.update.processor;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
/**
* Trims leading and trailing whitespace from any CharSequence values
* found in fields matching the specified conditions and returns the
* resulting String.
* <p>
* By default this processor matches all fields
* </p>
*
* <p>For example, with the configuration listed all String field values
* will have leading and trailing spaces removed except for fields whose
* named ends with "<code>_literal</code>".
* </p>
* <pre class="prettyprint">
* &lt;processor class="solr.TrimFieldUpdateProcessorFactory"&gt;
* &lt;lst name="exclude"&gt;
* &lt;str name="fieldRegex"&gt;.*_literal&lt;/str&gt;
* &lt;/lst&gt;
* &lt;/processor&gt;
* </pre>
*/
public final class TrimFieldUpdateProcessorFactory extends FieldMutatingUpdateProcessorFactory {
@SuppressWarnings("unchecked")
@Override
public void init(NamedList args) {
// no trim specific init args
super.init(args);
}
@Override
public UpdateRequestProcessor getInstance(SolrQueryRequest req,
SolrQueryResponse rsp,
UpdateRequestProcessor next) {
return new FieldValueMutatingUpdateProcessor(getSelector(), next) {
protected Object mutateValue(final Object src) {
if (src instanceof CharSequence) {
return ((CharSequence)src).toString().trim();
}
return src;
}
};
}
}

View File

@ -0,0 +1,235 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!--
Test Config that enumerates many different update processor chain
configurations.
-->
<config>
<luceneMatchVersion>${tests.luceneMatchVersion:LUCENE_CURRENT}</luceneMatchVersion>
<requestHandler name="standard" class="solr.StandardRequestHandler"></requestHandler>
<updateRequestProcessorChain name="comprehensive">
<processor class="solr.FieldLengthUpdateProcessorFactory">
<arr name="typeClass">
<str>solr.TrieIntField</str>
<str>solr.TrieLongField</str>
</arr>
</processor>
<processor class="solr.MinFieldValueUpdateProcessorFactory">
<str name="fieldName">min_foo_l</str>
</processor>
<processor class="solr.MaxFieldValueUpdateProcessorFactory">
<str name="fieldName">max_foo_l</str>
</processor>
<processor class="solr.ConcatFieldUpdateProcessorFactory">
<str name="delimiter">; </str>
<lst name="exclude">
<str name="fieldName">primary_author_s1</str>
</lst>
</processor>
<processor class="solr.FirstFieldValueUpdateProcessorFactory">
<str name="fieldName">primary_author_s1</str>
<str name="fieldName">first_foo_l</str>
</processor>
<processor class="solr.RunUpdateProcessorFactory" />
</updateRequestProcessorChain>
<updateRequestProcessorChain name="trim-all">
<!-- no specific src field configs, so all fields should get trimmed -->
<processor class="solr.TrimFieldUpdateProcessorFactory">
</processor>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="trim-field">
<processor class="solr.TrimFieldUpdateProcessorFactory">
<str name="fieldName">foo_t</str>
</processor>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="trim-most">
<!-- all fields except the exclusions should be trimmed -->
<processor class="solr.TrimFieldUpdateProcessorFactory">
<lst name="exclude">
<str name="fieldName">foo_t</str>
</lst>
</processor>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="trim-many">
<processor class="solr.TrimFieldUpdateProcessorFactory">
<str name="fieldRegex">foo.*</str>
<str name="fieldRegex">bar.*</str>
<lst name="exclude">
<str name="fieldRegex">.*HOSS.*</str>
</lst>
</processor>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="trim-few">
<processor class="solr.TrimFieldUpdateProcessorFactory">
<str name="fieldRegex">foo.*</str>
<str name="fieldRegex">bar.*</str>
<!-- each set of exclusions is checked independently -->
<lst name="exclude">
<str name="typeClass">solr.DateField</str>
</lst>
<lst name="exclude">
<str name="fieldRegex">.*HOSS.*</str>
</lst>
</processor>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="trim-some">
<processor class="solr.TrimFieldUpdateProcessorFactory">
<str name="fieldRegex">foo.*</str>
<str name="fieldRegex">bar.*</str>
<!-- only excluded if it matches all in set -->
<lst name="exclude">
<str name="typeClass">solr.DateField</str>
<str name="fieldRegex">.*HOSS.*</str>
</lst>
</processor>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="trim-fields">
<processor class="solr.TrimFieldUpdateProcessorFactory">
<str name="fieldName">name</str>
<str name="fieldName">foo_t</str>
</processor>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="trim-fields-arr">
<processor class="solr.TrimFieldUpdateProcessorFactory">
<arr name="fieldName">
<str>name</str>
<str>foo_t</str>
</arr>
</processor>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="trim-field-regexes">
<processor class="solr.TrimFieldUpdateProcessorFactory">
<str name="fieldRegex">foo.*</str>
<str name="fieldRegex">bar.*_s</str>
</processor>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="trim-types">
<processor class="solr.TrimFieldUpdateProcessorFactory">
<str name="typeName">nametext</str>
<str name="typeName">text_sw</str>
</processor>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="trim-classes">
<processor class="solr.TrimFieldUpdateProcessorFactory">
<str name="typeClass">solr.DateField</str>
<str name="typeClass">solr.StrField</str>
</processor>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="trim-multi">
<processor class="solr.TrimFieldUpdateProcessorFactory">
<str name="typeClass">solr.DateField</str>
<str name="typeClass">solr.StrField</str>
<arr name="fieldRegex">
<str>foo.*</str>
</arr>
</processor>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="remove-all-blanks">
<processor class="solr.RemoveBlankFieldUpdateProcessorFactory">
</processor>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="length-none">
<processor class="solr.FieldLengthUpdateProcessorFactory">
<!-- by default, the processor doesn't touch anything -->
</processor>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="length-some">
<processor class="solr.FieldLengthUpdateProcessorFactory">
<arr name="fieldRegex">
<str>foo.*</str>
<str>yak.*</str>
</arr>
</processor>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="concat-defaults">
<processor class="solr.ConcatFieldUpdateProcessorFactory">
</processor>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="concat-field">
<processor class="solr.ConcatFieldUpdateProcessorFactory">
<str name="fieldName">foo_s</str>
</processor>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="concat-type-delim">
<processor class="solr.ConcatFieldUpdateProcessorFactory">
<str name="typeName">string</str>
<str name="delimiter">; </str>
</processor>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="first-value">
<processor class="solr.FirstFieldValueUpdateProcessorFactory">
<str name="fieldName">foo_s</str>
<str name="fieldName">bar_s</str>
</processor>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="last-value">
<processor class="solr.LastFieldValueUpdateProcessorFactory">
<str name="fieldName">foo_s</str>
<str name="fieldName">bar_s</str>
</processor>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="min-value">
<processor class="solr.MinFieldValueUpdateProcessorFactory">
<str name="fieldName">foo_i</str>
<str name="fieldName">foo_s</str>
<str name="fieldName">bar_s</str>
</processor>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="max-value">
<processor class="solr.MaxFieldValueUpdateProcessorFactory">
<str name="fieldName">foo_i</str>
<str name="fieldName">foo_s</str>
<str name="fieldName">bar_s</str>
</processor>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="html-strip">
<processor class="solr.HTMLStripFieldUpdateProcessorFactory">
<str name="fieldName">html_s</str>
</processor>
<processor class="solr.TrimFieldUpdateProcessorFactory"/>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="regex-replace">
<processor class="solr.RegexReplaceProcessorFactory">
<str name="fieldName">content</str>
<str name="fieldName">title</str>
<str name="pattern">\s+</str>
<str name="replacement">X</str>
</processor>
</updateRequestProcessorChain>
</config>

View File

@ -0,0 +1,674 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.update.processor;
import java.util.ArrayList;
import java.util.LinkedHashSet;
import java.util.TreeSet;
import java.util.HashMap;
import java.util.Map;
import java.util.Arrays;
import java.io.IOException;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.core.SolrCore;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.update.AddUpdateCommand;
import org.apache.solr.update.processor.UpdateRequestProcessor;
import org.apache.solr.update.processor.UpdateRequestProcessorChain;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
/**
* Tests the basics of configuring FieldMutatingUpdateProcessors
* (mainly via TrimFieldUpdateProcessor) and the logic of other various
* subclasses.
*/
public class FieldMutatingUpdateProcessorTest extends SolrTestCaseJ4 {
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig-update-processor-chains.xml", "schema12.xml");
}
public void testComprehensive() throws Exception {
final String countMe = "how long is this string?";
final int count = countMe.length();
processAdd("comprehensive",
doc(f("id", "1111"),
f("primary_author_s1",
"XXXX", "Adam", "Sam"),
f("all_authors_s1",
"XXXX", "Adam", "Sam"),
f("foo_is", countMe, new Integer(42)),
f("first_foo_l", countMe, new Integer(-34)),
f("max_foo_l", countMe, new Integer(-34)),
f("min_foo_l", countMe, new Integer(-34))));
assertU(commit());
assertQ(req("id:1111")
,"//str[@name='primary_author_s1'][.='XXXX']"
,"//str[@name='all_authors_s1'][.='XXXX; Adam; Sam']"
,"//arr[@name='foo_is']/int[1][.='"+count+"']"
,"//arr[@name='foo_is']/int[2][.='42']"
,"//long[@name='max_foo_l'][.='"+count+"']"
,"//long[@name='first_foo_l'][.='"+count+"']"
,"//long[@name='min_foo_l'][.='-34']"
);
}
public void testTrimAll() throws Exception {
SolrInputDocument d = null;
d = processAdd("trim-all",
doc(f("id", "1111"),
f("name", " Hoss ", new StringBuilder(" Man")),
f("foo_t", " some text ", "other Text\t"),
f("foo_d", new Integer(42)),
field("foo_s", 5.0F, " string ")));
assertNotNull(d);
// simple stuff
assertEquals("string", d.getFieldValue("foo_s"));
assertEquals(Arrays.asList("some text","other Text"),
d.getFieldValues("foo_t"));
assertEquals(Arrays.asList("Hoss","Man"),
d.getFieldValues("name"));
// slightly more interesting
assertEquals("processor borked non string value",
new Integer(42), d.getFieldValue("foo_d"));
assertEquals("wrong boost",
5.0F, d.getField("foo_s").getBoost(), 0.0F);
}
public void testTrimFields() throws Exception {
for (String chain : Arrays.asList("trim-fields", "trim-fields-arr")) {
SolrInputDocument d = null;
d = processAdd(chain,
doc(f("id", "1111"),
f("name", " Hoss ", " Man"),
f("foo_t", " some text ", "other Text\t"),
f("foo_s", " string ")));
assertNotNull(d);
assertEquals(" string ", d.getFieldValue("foo_s"));
assertEquals(Arrays.asList("some text","other Text"),
d.getFieldValues("foo_t"));
assertEquals(Arrays.asList("Hoss","Man"),
d.getFieldValues("name"));
}
}
public void testTrimField() throws Exception {
SolrInputDocument d = null;
d = processAdd("trim-field",
doc(f("id", "1111"),
f("name", " Hoss ", " Man"),
f("foo_t", " some text ", "other Text\t"),
f("foo_s", " string ")));
assertNotNull(d);
assertEquals(" string ", d.getFieldValue("foo_s"));
assertEquals(Arrays.asList("some text","other Text"),
d.getFieldValues("foo_t"));
assertEquals(Arrays.asList(" Hoss "," Man"),
d.getFieldValues("name"));
}
public void testTrimRegex() throws Exception {
SolrInputDocument d = null;
d = processAdd("trim-field-regexes",
doc(f("id", "1111"),
f("foo_t", " string1 "),
f("foozat_s", " string2 "),
f("bar_t", " string3 "),
f("bar_s", " string4 ")));
assertNotNull(d);
assertEquals("string1", d.getFieldValue("foo_t"));
assertEquals("string2", d.getFieldValue("foozat_s"));
assertEquals(" string3 ", d.getFieldValue("bar_t"));
assertEquals("string4", d.getFieldValue("bar_s"));
}
public void testTrimTypes() throws Exception {
SolrInputDocument d = null;
d = processAdd("trim-types",
doc(f("id", "1111"),
f("foo_sw", " string0 "),
f("name", " string1 "),
f("title", " string2 "),
f("bar_t", " string3 "),
f("bar_s", " string4 ")));
assertNotNull(d);
assertEquals("string0", d.getFieldValue("foo_sw"));
assertEquals("string1", d.getFieldValue("name"));
assertEquals("string2", d.getFieldValue("title"));
assertEquals(" string3 ", d.getFieldValue("bar_t"));
assertEquals(" string4 ", d.getFieldValue("bar_s"));
}
public void testTrimClasses() throws Exception {
SolrInputDocument d = null;
d = processAdd("trim-classes",
doc(f("id", "1111"),
f("foo_t", " string1 "),
f("foo_s", " string2 "),
f("bar_dt", " string3 "),
f("bar_pdt", " string4 ")));
assertNotNull(d);
assertEquals(" string1 ", d.getFieldValue("foo_t"));
assertEquals("string2", d.getFieldValue("foo_s"));
assertEquals("string3", d.getFieldValue("bar_dt"));
assertEquals("string4", d.getFieldValue("bar_pdt"));
}
public void testTrimMultipleRules() throws Exception {
SolrInputDocument d = null;
d = processAdd("trim-multi",
doc(f("id", "1111"),
f("foo_t", " string1 "),
f("foo_s", " string2 "),
f("bar_dt", " string3 "),
f("foo_pdt", " string4 ")));
assertNotNull(d);
assertEquals(" string1 ", d.getFieldValue("foo_t"));
assertEquals("string2", d.getFieldValue("foo_s"));
assertEquals(" string3 ", d.getFieldValue("bar_dt"));
assertEquals("string4", d.getFieldValue("foo_pdt"));
}
public void testTrimExclusions() throws Exception {
SolrInputDocument d = null;
d = processAdd("trim-most",
doc(f("id", "1111"),
f("foo_t", " string1 "),
f("foo_s", " string2 "),
f("bar_dt", " string3 "),
f("foo_pdt", " string4 ")));
assertNotNull(d);
assertEquals(" string1 ", d.getFieldValue("foo_t"));
assertEquals("string2", d.getFieldValue("foo_s"));
assertEquals("string3", d.getFieldValue("bar_dt"));
assertEquals("string4", d.getFieldValue("foo_pdt"));
d = processAdd("trim-many",
doc(f("id", "1111"),
f("foo_t", " string1 "),
f("foo_s", " string2 "),
f("bar_dt", " string3 "),
f("bar_HOSS_s", " string4 "),
f("foo_pdt", " string5 "),
f("foo_HOSS_pdt", " string6 ")));
assertNotNull(d);
assertEquals("string1", d.getFieldValue("foo_t"));
assertEquals("string2", d.getFieldValue("foo_s"));
assertEquals("string3", d.getFieldValue("bar_dt"));
assertEquals(" string4 ", d.getFieldValue("bar_HOSS_s"));
assertEquals("string5", d.getFieldValue("foo_pdt"));
assertEquals(" string6 ", d.getFieldValue("foo_HOSS_pdt"));
d = processAdd("trim-few",
doc(f("id", "1111"),
f("foo_t", " string1 "),
f("foo_s", " string2 "),
f("bar_dt", " string3 "),
f("bar_HOSS_s", " string4 "),
f("foo_pdt", " string5 "),
f("foo_HOSS_pdt", " string6 ")));
assertNotNull(d);
assertEquals("string1", d.getFieldValue("foo_t"));
assertEquals("string2", d.getFieldValue("foo_s"));
assertEquals(" string3 ", d.getFieldValue("bar_dt"));
assertEquals(" string4 ", d.getFieldValue("bar_HOSS_s"));
assertEquals(" string5 ", d.getFieldValue("foo_pdt"));
assertEquals(" string6 ", d.getFieldValue("foo_HOSS_pdt"));
d = processAdd("trim-some",
doc(f("id", "1111"),
f("foo_t", " string1 "),
f("foo_s", " string2 "),
f("bar_dt", " string3 "),
f("bar_HOSS_s", " string4 "),
f("foo_pdt", " string5 "),
f("foo_HOSS_pdt", " string6 ")));
assertNotNull(d);
assertEquals("string1", d.getFieldValue("foo_t"));
assertEquals("string2", d.getFieldValue("foo_s"));
assertEquals("string3", d.getFieldValue("bar_dt"));
assertEquals("string4", d.getFieldValue("bar_HOSS_s"));
assertEquals("string5", d.getFieldValue("foo_pdt"));
assertEquals(" string6 ", d.getFieldValue("foo_HOSS_pdt"));
}
public void testRemoveBlanks() throws Exception {
SolrInputDocument d = null;
d = processAdd("remove-all-blanks",
doc(f("id", "1111"),
f("foo_s", "string1", ""),
f("bar_dt", "string2", "", "string3"),
f("yak_t", ""),
f("foo_d", new Integer(42))));
assertNotNull(d);
assertEquals(Arrays.asList("string1"),
d.getFieldValues("foo_s"));
assertEquals(Arrays.asList("string2","string3"),
d.getFieldValues("bar_dt"));
assertFalse("shouldn't be any values for yak_t",
d.containsKey("yak_t"));
assertEquals("processor borked non string value",
new Integer(42), d.getFieldValue("foo_d"));
}
public void testStrLength() throws Exception {
SolrInputDocument d = null;
d = processAdd("length-none",
doc(f("id", "1111"),
f("foo_s", "string1", "string222"),
f("bar_dt", "string3"),
f("yak_t", ""),
f("foo_d", new Integer(42))));
assertNotNull(d);
assertEquals(Arrays.asList("string1","string222"),
d.getFieldValues("foo_s"));
assertEquals("string3", d.getFieldValue("bar_dt"));
assertEquals("", d.getFieldValue("yak_t"));
assertEquals("processor borked non string value",
new Integer(42), d.getFieldValue("foo_d"));
d = processAdd("length-some",
doc(f("id", "1111"),
f("foo_s", "string1", "string222"),
f("bar_dt", "string3"),
f("yak_t", ""),
f("foo_d", new Integer(42))));
assertNotNull(d);
assertEquals(Arrays.asList(new Integer(7), new Integer(9)),
d.getFieldValues("foo_s"));
assertEquals("string3", d.getFieldValue("bar_dt"));
assertEquals(new Integer(0), d.getFieldValue("yak_t"));
assertEquals("processor borked non string value",
new Integer(42), d.getFieldValue("foo_d"));
}
public void testRegexReplace() throws Exception {
SolrInputDocument d = null;
d = processAdd("regex-replace",
doc(f("id", "doc1"),
f("content", "This is a text\t with a lot\n of whitespace"),
f("title", "This\ttitle has a lot of spaces")));
assertNotNull(d);
assertEquals("ThisXisXaXtextXwithXaXlotXofXwhitespace",
d.getFieldValue("content"));
assertEquals("ThisXtitleXhasXaXlotXofXspaces",
d.getFieldValue("title"));
}
public void testFirstValue() throws Exception {
SolrInputDocument d = null;
d = processAdd("first-value",
doc(f("id", "1111"),
f("foo_s", "string1", "string222"),
f("bar_s", "string3"),
f("yak_t", "string4", "string5")));
assertNotNull(d);
assertEquals(Arrays.asList("string1"),
d.getFieldValues("foo_s"));
assertEquals(Arrays.asList("string3"),
d.getFieldValues("bar_s"));
assertEquals(Arrays.asList("string4", "string5"),
d.getFieldValues("yak_t"));
}
public void testLastValue() throws Exception {
SolrInputDocument d = null;
// basics
d = processAdd("last-value",
doc(f("id", "1111"),
f("foo_s", "string1", "string222"),
f("bar_s", "string3"),
f("yak_t", "string4", "string5")));
assertNotNull(d);
assertEquals(Arrays.asList("string222"),
d.getFieldValues("foo_s"));
assertEquals(Arrays.asList("string3"),
d.getFieldValues("bar_s"));
assertEquals(Arrays.asList("string4", "string5"),
d.getFieldValues("yak_t"));
// test optimizations (and force test of defaults)
SolrInputField special = null;
// test something that's definitely a SortedSet
special = new SolrInputField("foo_s");
special.setValue(new TreeSet<String>
(Arrays.asList("ggg", "first", "last", "hhh")), 1.2F);
d = processAdd("last-value",
doc(f("id", "1111"),
special));
assertNotNull(d);
assertEquals("last", d.getFieldValue("foo_s"));
// test something that's definitely a List
special = new SolrInputField("foo_s");
special.setValue(Arrays.asList("first", "ggg", "hhh", "last"), 1.2F);
d = processAdd("last-value",
doc(f("id", "1111"),
special));
assertNotNull(d);
assertEquals("last", d.getFieldValue("foo_s"));
// test something that is definitely not a List or SortedSet
// (ie: get default behavior of Collection using iterator)
special = new SolrInputField("foo_s");
special.setValue(new LinkedHashSet<String>
(Arrays.asList("first", "ggg", "hhh", "last")), 1.2F);
d = processAdd("last-value",
doc(f("id", "1111"),
special));
assertNotNull(d);
assertEquals("last", d.getFieldValue("foo_s"));
}
public void testMinValue() throws Exception {
SolrInputDocument d = null;
d = processAdd("min-value",
doc(f("id", "1111"),
f("foo_s", "zzz", "aaa", "bbb"),
f("foo_i", 42, 128, -3),
f("bar_s", "aaa"),
f("yak_t", "aaa", "bbb")));
assertNotNull(d);
assertEquals(Arrays.asList("aaa"),
d.getFieldValues("foo_s"));
assertEquals(Arrays.asList(-3),
d.getFieldValues("foo_i"));
assertEquals(Arrays.asList("aaa"),
d.getFieldValues("bar_s"));
assertEquals(Arrays.asList("aaa", "bbb"),
d.getFieldValues("yak_t"));
// uncomparable should not fail
d = processAdd("min-value",
doc(f("id", "1111"),
f("foo_s", "zzz", new Integer(42), "bbb"),
f("bar_s", "aaa"),
f("yak_t", "aaa", "bbb")));
assertNotNull(d);
assertEquals(Arrays.asList("zzz", new Integer(42), "bbb"),
d.getFieldValues("foo_s"));
assertEquals(Arrays.asList("aaa"),
d.getFieldValues("bar_s"));
assertEquals(Arrays.asList("aaa", "bbb"),
d.getFieldValues("yak_t"));
}
public void testMaxValue() throws Exception {
SolrInputDocument d = null;
d = processAdd("max-value",
doc(f("id", "1111"),
f("foo_s", "zzz", "aaa", "bbb"),
f("foo_i", 42, 128, -3),
f("bar_s", "aaa"),
f("yak_t", "aaa", "bbb")));
assertNotNull(d);
assertEquals(Arrays.asList("zzz"),
d.getFieldValues("foo_s"));
assertEquals(Arrays.asList(128),
d.getFieldValues("foo_i"));
assertEquals(Arrays.asList("aaa"),
d.getFieldValues("bar_s"));
assertEquals(Arrays.asList("aaa", "bbb"),
d.getFieldValues("yak_t"));
// uncomparable should not fail
d = processAdd("max-value",
doc(f("id", "1111"),
f("foo_s", "zzz", new Integer(42), "bbb"),
f("bar_s", "aaa"),
f("yak_t", "aaa", "bbb")));
assertNotNull(d);
assertEquals(Arrays.asList("zzz", new Integer(42), "bbb"),
d.getFieldValues("foo_s"));
assertEquals(Arrays.asList("aaa"),
d.getFieldValues("bar_s"));
assertEquals(Arrays.asList("aaa", "bbb"),
d.getFieldValues("yak_t"));
}
public void testHtmlStrip() throws Exception {
SolrInputDocument d = null;
d = processAdd("html-strip",
doc(f("id", "1111"),
f("html_s", "<body>hi &amp; bye", "aaa", "bbb"),
f("bar_s", "<body>hi &amp; bye")));
assertNotNull(d);
assertEquals(Arrays.asList("hi & bye", "aaa", "bbb"),
d.getFieldValues("html_s"));
assertEquals("<body>hi &amp; bye", d.getFieldValue("bar_s"));
}
public void testConcatDefaults() throws Exception {
SolrInputDocument d = null;
d = processAdd("concat-defaults",
doc(f("id", "1111", "222"),
f("attr_foo", "string1", "string2"),
f("foo_s1", "string3", "string4"),
f("bar_dt", "string5", "string6"),
f("bar_HOSS_s", "string7", "string8"),
f("foo_d", new Integer(42))));
assertNotNull(d);
assertEquals("1111, 222", d.getFieldValue("id"));
assertEquals(Arrays.asList("string1","string2"),
d.getFieldValues("attr_foo"));
assertEquals("string3, string4", d.getFieldValue("foo_s1"));
assertEquals(Arrays.asList("string5","string6"),
d.getFieldValues("bar_dt"));
assertEquals(Arrays.asList("string7","string8"),
d.getFieldValues("bar_HOSS_s"));
assertEquals("processor borked non string value",
new Integer(42), d.getFieldValue("foo_d"));
}
public void testConcatExplicit() throws Exception {
doSimpleDelimTest("concat-field", ", ");
}
public void testConcatExplicitWithDelim() throws Exception {
doSimpleDelimTest("concat-type-delim", "; ");
}
private void doSimpleDelimTest(final String chain, final String delim)
throws Exception {
SolrInputDocument d = null;
d = processAdd(chain,
doc(f("id", "1111"),
f("foo_t", "string1", "string2"),
f("foo_d", new Integer(42)),
field("foo_s", 3.0F, "string3", "string4")));
assertNotNull(d);
assertEquals(Arrays.asList("string1","string2"),
d.getFieldValues("foo_t"));
assertEquals("string3" + delim + "string4", d.getFieldValue("foo_s"));
// slightly more interesting
assertEquals("processor borked non string value",
new Integer(42), d.getFieldValue("foo_d"));
assertEquals("wrong boost",
3.0F, d.getField("foo_s").getBoost(), 0.0F);
}
/**
* Convinience method for building up SolrInputDocuments
*/
SolrInputDocument doc(SolrInputField... fields) {
SolrInputDocument d = new SolrInputDocument();
for (SolrInputField f : fields) {
d.put(f.getName(), f);
}
return d;
}
/**
* Convinience method for building up SolrInputFields
*/
SolrInputField field(String name, float boost, Object... values) {
SolrInputField f = new SolrInputField(name);
for (Object v : values) {
f.addValue(v, 1.0F);
}
f.setBoost(boost);
return f;
}
/**
* Convinience method for building up SolrInputFields with default boost
*/
SolrInputField f(String name, Object... values) {
return field(name, 1.0F, values);
}
/**
* Runs a document through the specified chain, and returns the final
* document used when the chain is completed (NOTE: some chains may
* modifiy the document in place
*/
SolrInputDocument processAdd(final String chain,
final SolrInputDocument docIn)
throws IOException {
SolrCore core = h.getCore();
UpdateRequestProcessorChain pc = core.getUpdateProcessingChain(chain);
assertNotNull("No Chain named: " + chain, pc);
SolrQueryResponse rsp = new SolrQueryResponse();
SolrQueryRequest req = new LocalSolrQueryRequest
(core, new ModifiableSolrParams());
try {
AddUpdateCommand cmd = new AddUpdateCommand(req);
cmd.solrDoc = docIn;
UpdateRequestProcessor processor = pc.createProcessor(req, rsp);
processor.processAdd(cmd);
return cmd.solrDoc;
} finally {
req.close();
}
}
}