mirror of https://github.com/apache/lucene.git
SOLR-2802: several new UpdateProcessorFactories for modifing fields of documents, along with base classes to make writing these types of classes easier for users
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1242514 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e825015535
commit
4906b8a614
|
@ -207,6 +207,21 @@ New Features
|
|||
* SOLR-1726: Added deep paging support to search (sort by score only) which should use less memory when paging deeply into results
|
||||
by keeping the priority queue small. (Manojkumar Rangasamy Kannadasan, gsingers)
|
||||
|
||||
* SOLR-2802: New FieldMutatingUpdateProcessor and Factory to simlify the
|
||||
development of UpdateProcessors that modify field values of documents as
|
||||
they are indexed. Also includes several useful new implementations:
|
||||
RemoveBlankFieldUpdateProcessorFactory
|
||||
TrimFieldUpdateProcessorFactory
|
||||
HTMLStripFieldUpdateProcessorFactory
|
||||
RegexReplaceProcessorFactory
|
||||
FieldLengthUpdateProcessorFactory
|
||||
ConcatFieldUpdateProcessorFactory
|
||||
FirstFieldValueUpdateProcessorFactory
|
||||
LastFieldValueUpdateProcessorFactory
|
||||
MinFieldValueUpdateProcessorFactory
|
||||
MaxFieldValueUpdateProcessorFactory
|
||||
(hossman, janhoy)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -0,0 +1,124 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.update.processor;
|
||||
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.schema.FieldType;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.schema.TextField;
|
||||
import org.apache.solr.schema.StrField;
|
||||
|
||||
import org.apache.solr.common.SolrInputField;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
|
||||
/**
|
||||
* Concatenates multiple values for fields matching the specified
|
||||
* conditions using a configurable <code>delimiter</code> which defaults
|
||||
* to "<code> ,</code>".
|
||||
* <p>
|
||||
* By default, this processor concatenates the values for any field name
|
||||
* which according to the schema is <code>multiValued="false"</code>
|
||||
* and uses <code>TextField</code> or <code>StrField</code>
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* For example, in the configuration below, any "single valued" string and
|
||||
* text field which is found to contain multiple values <i>except</i> for
|
||||
* the <code>primary_author</code> field will be concatenated using the
|
||||
* string "<code> ;</code>" as a delimeter. For the
|
||||
* <code>primary_author</code> field, the multiple values will be left
|
||||
* alone for <code>FirstFieldValueUpdateProcessorFactory</code> to deal with.
|
||||
* </p>
|
||||
*
|
||||
* <pre class="prettyprint">
|
||||
* <updateRequestProcessorChain>
|
||||
* <processor class="solr.ConcatFieldUpdateProcessorFactory">
|
||||
* <str name="delimiter">; </str>
|
||||
* <lst name="exclude">
|
||||
* <str name="fieldName">primary_author</str>
|
||||
* </lst>
|
||||
* </processor>
|
||||
* <processor class="solr.FirstFieldValueUpdateProcessorFactory">
|
||||
* <str name="fieldName">primary_author</str>
|
||||
* </processor>
|
||||
* </updateRequestProcessorChain>
|
||||
* </pre>
|
||||
*/
|
||||
public final class ConcatFieldUpdateProcessorFactory extends FieldMutatingUpdateProcessorFactory {
|
||||
|
||||
String delimiter = ", ";
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
@Override
|
||||
public void init(NamedList args) {
|
||||
Object d = args.remove("delimiter");
|
||||
if (null != d) delimiter = d.toString();
|
||||
|
||||
super.init(args);
|
||||
}
|
||||
|
||||
@Override
|
||||
public UpdateRequestProcessor getInstance(SolrQueryRequest req,
|
||||
SolrQueryResponse rsp,
|
||||
UpdateRequestProcessor next) {
|
||||
return new FieldMutatingUpdateProcessor(getSelector(), next) {
|
||||
protected SolrInputField mutate(final SolrInputField src) {
|
||||
if (src.getValueCount() <= 1) return src;
|
||||
|
||||
SolrInputField result = new SolrInputField(src.getName());
|
||||
result.setValue(StringUtils.join(src.getValues(), delimiter),
|
||||
src.getBoost());
|
||||
return result;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldMutatingUpdateProcessor.FieldNameSelector
|
||||
getDefaultSelector(final SolrCore core) {
|
||||
|
||||
final IndexSchema schema = core.getSchema();
|
||||
return new FieldMutatingUpdateProcessor.FieldNameSelector() {
|
||||
public boolean shouldMutate(final String fieldName) {
|
||||
|
||||
// first check type since it should be fastest
|
||||
FieldType type = schema.getFieldTypeNoEx(fieldName);
|
||||
if (null == type) return false;
|
||||
|
||||
if (! (TextField.class.isInstance(type)
|
||||
|| StrField.class.isInstance(type))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// only ask for SchemaField if we passed the type check.
|
||||
SchemaField sf = schema.getFieldOrNull(fieldName);
|
||||
// shouldn't be null since since type wasn't, but just in case
|
||||
if (null == sf) return false;
|
||||
|
||||
return ! sf.multiValued();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,80 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.update.processor;
|
||||
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
|
||||
|
||||
/**
|
||||
* Replaces any CharSequence values found in fields matching the specified
|
||||
* conditions with the lengths of those CharSequences (as an Integer).
|
||||
* <p>
|
||||
* By default, this processor matches no fields.
|
||||
* </p>
|
||||
* <p>For example, with the configuration listed below any documents
|
||||
* containing String values (such as "<code>abcdef</code>" or
|
||||
* "<code>xyz</code>") in a field declared in the schema using
|
||||
* <code>TrieIntField</code> or <code>TrieLongField</code>
|
||||
* would have those Strings replaced with the length of those fields as an
|
||||
* Integer
|
||||
* (ie: <code>6</code> and <code>3</code> respectively)
|
||||
* </p>
|
||||
* <pre class="prettyprint">
|
||||
* <processor class="solr.FieldLengthUpdateProcessorFactory">
|
||||
* <arr name="typeClass">
|
||||
* <str>solr.TrieIntField</str>
|
||||
* <str>solr.TrieLongField</str>
|
||||
* </arr>
|
||||
* </processor>
|
||||
* </pre>
|
||||
*/
|
||||
public final class FieldLengthUpdateProcessorFactory extends FieldMutatingUpdateProcessorFactory {
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
@Override
|
||||
public void init(NamedList args) {
|
||||
// no length specific init args
|
||||
super.init(args);
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldMutatingUpdateProcessor.FieldNameSelector
|
||||
getDefaultSelector(final SolrCore core) {
|
||||
|
||||
return FieldMutatingUpdateProcessor.SELECT_NO_FIELDS;
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public UpdateRequestProcessor getInstance(SolrQueryRequest req,
|
||||
SolrQueryResponse rsp,
|
||||
UpdateRequestProcessor next) {
|
||||
return new FieldValueMutatingUpdateProcessor(getSelector(), next) {
|
||||
protected Object mutateValue(final Object src) {
|
||||
if (src instanceof CharSequence) {
|
||||
return new Integer(((CharSequence)src).length());
|
||||
}
|
||||
return src;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,283 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.update.processor;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.regex.PatternSyntaxException;
|
||||
|
||||
import static org.apache.solr.common.SolrException.ErrorCode.*;
|
||||
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.common.SolrInputField;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.core.SolrResourceLoader;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.schema.FieldType;
|
||||
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.update.AddUpdateCommand;
|
||||
|
||||
/**
|
||||
* Reusable base class for UpdateProcessors that will consider
|
||||
* AddUpdateCommands and mutate the values assocaited with configured
|
||||
* fields.
|
||||
* <p>
|
||||
* Subclasses should override the mutate method to specify how individual
|
||||
* SolrInputFields identified by the selector associated with this instance
|
||||
* will be mutated.
|
||||
* </p>
|
||||
*
|
||||
* @see FieldMutatingUpdateProcessorFactory
|
||||
* @see FieldValueMutatingUpdateProcessor
|
||||
* @see FieldNameSelector
|
||||
*/
|
||||
public abstract class FieldMutatingUpdateProcessor
|
||||
extends UpdateRequestProcessor {
|
||||
|
||||
private final FieldNameSelector selector;
|
||||
public FieldMutatingUpdateProcessor(FieldNameSelector selector,
|
||||
UpdateRequestProcessor next) {
|
||||
super(next);
|
||||
this.selector = selector;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method for mutating SolrInputFields associated with fields identified
|
||||
* by the FieldNameSelector associated with this processor
|
||||
* @param src the SolrInputField to mutate, may be modified in place and
|
||||
* returned
|
||||
* @return the SolrInputField to use in replacing the original (src) value.
|
||||
* If null the field will be removed.
|
||||
*/
|
||||
protected abstract SolrInputField mutate(final SolrInputField src);
|
||||
|
||||
@Override
|
||||
public void processAdd(AddUpdateCommand cmd) throws IOException {
|
||||
final SolrInputDocument doc = cmd.getSolrInputDocument();
|
||||
|
||||
// make a copy we can iterate over while mutating the doc
|
||||
final Collection<String> fieldNames
|
||||
= new ArrayList<String>(doc.getFieldNames());
|
||||
|
||||
for (final String fname : fieldNames) {
|
||||
|
||||
if (! selector.shouldMutate(fname)) continue;
|
||||
|
||||
final SolrInputField src = doc.get(fname);
|
||||
final SolrInputField dest = mutate(src);
|
||||
if (null == dest) {
|
||||
doc.remove(fname);
|
||||
} else {
|
||||
// semantics of what happens if dest has diff name are hard
|
||||
// we could treat it as a copy, or a rename
|
||||
// for now, don't allow it.
|
||||
if (! fname.equals(dest.getName()) ) {
|
||||
throw new SolrException(SERVER_ERROR,
|
||||
"mutute returned field with different name: "
|
||||
+ fname + " => " + dest.getName());
|
||||
}
|
||||
doc.put(dest.getName(), dest);
|
||||
}
|
||||
}
|
||||
super.processAdd(cmd);
|
||||
}
|
||||
|
||||
/**
|
||||
* Interface for idenfifying which fileds should be mutated
|
||||
*/
|
||||
public static interface FieldNameSelector {
|
||||
public boolean shouldMutate(final String fieldName);
|
||||
}
|
||||
|
||||
/** Singleton indicating all fields should be mutated */
|
||||
public static final FieldNameSelector SELECT_ALL_FIELDS
|
||||
= new FieldNameSelector() {
|
||||
public boolean shouldMutate(final String fieldName) {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
/** Singleton indicating no fields should be mutated */
|
||||
public static final FieldNameSelector SELECT_NO_FIELDS
|
||||
= new FieldNameSelector() {
|
||||
public boolean shouldMutate(final String fieldName) {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Wraps two FieldNameSelectors such that the FieldNameSelector
|
||||
* returned matches all fields specified by the "includes" unless they
|
||||
* are matched by "excludes"
|
||||
* @param includes a selector identifying field names that should be selected
|
||||
* @param excludes a selector identifying field names that should be
|
||||
* <i>not</i> be selected, even if they are matched by the 'includes'
|
||||
* selector
|
||||
* @return Either a new FieldNameSelector or one of the input selecors
|
||||
* if the combination lends itself to optimization.
|
||||
*/
|
||||
public static FieldNameSelector wrap(final FieldNameSelector includes,
|
||||
final FieldNameSelector excludes) {
|
||||
|
||||
if (SELECT_NO_FIELDS == excludes) {
|
||||
return includes;
|
||||
}
|
||||
|
||||
if (SELECT_ALL_FIELDS == excludes) {
|
||||
return SELECT_NO_FIELDS;
|
||||
}
|
||||
|
||||
if (SELECT_ALL_FIELDS == includes) {
|
||||
return new FieldNameSelector() {
|
||||
public boolean shouldMutate(final String fieldName) {
|
||||
return ! excludes.shouldMutate(fieldName);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
return new FieldNameSelector() {
|
||||
public boolean shouldMutate(final String fieldName) {
|
||||
return (includes.shouldMutate(fieldName)
|
||||
&& ! excludes.shouldMutate(fieldName));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility method that can be used to define a FieldNameSelector
|
||||
* using the same types of rules as the FieldMutatingUpdateProcessor init
|
||||
* code. This may be useful for Factories that wish to define default
|
||||
* selectors in similar terms to what the configuration would look like.
|
||||
* @lucene.internal
|
||||
*/
|
||||
public static FieldNameSelector createFieldNameSelector
|
||||
(final SolrResourceLoader loader,
|
||||
final IndexSchema schema,
|
||||
final Set<String> fields,
|
||||
final Set<String> typeNames,
|
||||
final Collection<String> typeClasses,
|
||||
final Collection<Pattern> regexes,
|
||||
final FieldNameSelector defSelector) {
|
||||
|
||||
final Collection<Class> classes
|
||||
= new ArrayList<Class>(typeClasses.size());
|
||||
|
||||
for (String t : typeClasses) {
|
||||
try {
|
||||
classes.add(loader.findClass(t));
|
||||
} catch (Exception e) {
|
||||
throw new SolrException(SERVER_ERROR,
|
||||
"Can't resolve typeClass: " + t, e);
|
||||
}
|
||||
}
|
||||
|
||||
if (classes.isEmpty() &&
|
||||
typeNames.isEmpty() &&
|
||||
regexes.isEmpty() &&
|
||||
fields.isEmpty()) {
|
||||
return defSelector;
|
||||
}
|
||||
|
||||
return new ConfigurableFieldNameSelector
|
||||
(schema, fields, typeNames, classes, regexes);
|
||||
}
|
||||
|
||||
private static final class ConfigurableFieldNameSelector
|
||||
implements FieldNameSelector {
|
||||
|
||||
final IndexSchema schema;
|
||||
final Set<String> fields;
|
||||
final Set<String> typeNames;
|
||||
final Collection<Class> classes;
|
||||
final Collection<Pattern> regexes;
|
||||
|
||||
private ConfigurableFieldNameSelector(final IndexSchema schema,
|
||||
final Set<String> fields,
|
||||
final Set<String> typeNames,
|
||||
final Collection<Class> classes,
|
||||
final Collection<Pattern> regexes) {
|
||||
this.schema = schema;
|
||||
this.fields = fields;
|
||||
this.typeNames = typeNames;
|
||||
this.classes = classes;
|
||||
this.regexes = regexes;
|
||||
}
|
||||
|
||||
public boolean shouldMutate(final String fieldName) {
|
||||
|
||||
// order of checks is bsaed on what should be quicker
|
||||
// (ie: set lookups faster the looping over instanceOf / matches tests
|
||||
|
||||
if ( ! (fields.isEmpty() || fields.contains(fieldName)) ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// do not consider it an error if the fieldName has no type
|
||||
// there might be another processor dealing with it later
|
||||
FieldType t = schema.getFieldTypeNoEx(fieldName);
|
||||
if (null != t) {
|
||||
if (! (typeNames.isEmpty() || typeNames.contains(t.getTypeName())) ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (! (classes.isEmpty() || instanceOfAny(t, classes)) ) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (! (regexes.isEmpty() || matchesAny(fieldName, regexes)) ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* returns true if the Object 'o' is an instance of any class in
|
||||
* the Collection
|
||||
*/
|
||||
private static boolean instanceOfAny(Object o, Collection<Class> classes) {
|
||||
for (Class c : classes) {
|
||||
if ( c.isInstance(o) ) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* returns true if the CharSequence 's' matches any Pattern in the
|
||||
* Collection
|
||||
*/
|
||||
private static boolean matchesAny(CharSequence s,
|
||||
Collection<Pattern> regexes) {
|
||||
for (Pattern p : regexes) {
|
||||
if (p.matcher(s).matches()) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,284 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.update.processor;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.regex.PatternSyntaxException;
|
||||
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import static org.apache.solr.common.SolrException.ErrorCode.*;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.update.AddUpdateCommand;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.schema.FieldType;
|
||||
import org.apache.solr.util.plugin.SolrCoreAware;
|
||||
|
||||
|
||||
/**
|
||||
* Base class for implementing Factories for FieldMutatingUpdateProcessors and
|
||||
* FieldValueMutatingUpdateProcessors.
|
||||
*
|
||||
* <p>
|
||||
* This class provides all of the plumbing for configuring the
|
||||
* FieldNameSelector using the following init params to specify selection
|
||||
* critera...
|
||||
* </p>
|
||||
* <ul>
|
||||
* <li><code>fieldName</code> - selecting specific fields by field name lookup</li>
|
||||
* <li><code>fieldRegex</code> - selecting specific fields by field name regex match (regexes are checked in the order specified)</li>
|
||||
* <li><code>typeName</code> - selecting specific fields by fieldType name lookup</li>
|
||||
* <li><code>typeClass</code> - selecting specific fields by fieldType class lookup, including inheritence and interfaces</li>
|
||||
* </ul>
|
||||
*
|
||||
* <p>
|
||||
* Each critera can specified as either an <arr> of <str>, or
|
||||
* multiple <str> with the same name. When multiple criteria of a
|
||||
* single type exist, fields must match <b>at least one</b> to be selected.
|
||||
* If more then one type of critera exist, fields must match
|
||||
* <b>at least one of each</b> to be selected.
|
||||
* </p>
|
||||
* <p>
|
||||
* One or more <code>excludes</code> <lst> params may also be specified,
|
||||
* containing any of the above criteria, identifying fields to be excluded
|
||||
* from seelction even if they match the selection criteria. As with the main
|
||||
* selection critiera a field must match all of criteria in a single exclusion
|
||||
* in order to be excluded, but multiple exclusions may be specified to get an
|
||||
* <code>OR</code> behavior
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* In the ExampleFieldMutatingUpdateProcessorFactory configured below,
|
||||
* fields will be mutated if the name starts with "foo" <i>or</i> "bar";
|
||||
* <b>unless</b> the field name contains the substring "SKIP" <i>or</i>
|
||||
* the fieldType is (or subclasses) DateField. Meaning a field named
|
||||
* "foo_SKIP" is gaurunteed not to be selected, but a field named "bar_smith"
|
||||
* that uses StrField will be selected.
|
||||
* </p>
|
||||
* <pre class="prettyprint">
|
||||
* <processor class="solr.ExampleFieldMutatingUpdateProcessorFactory">
|
||||
* <str name="fieldRegex">foo.*</str>
|
||||
* <str name="fieldRegex">bar.*</str>
|
||||
* <!-- each set of exclusions is checked independently -->
|
||||
* <lst name="exclude">
|
||||
* <str name="fieldRegex">.*SKIP.*</str>
|
||||
* </lst>
|
||||
* <lst name="exclude">
|
||||
* <str name="typeClass">solr.DateField</str>
|
||||
* </lst>
|
||||
* </processor>
|
||||
* </pre>
|
||||
*
|
||||
* <p>
|
||||
* Subclasses define the default selection behavior to be applied if no
|
||||
* criteria is configured by the user. User configured "exclude" criteria
|
||||
* will be applied to the subclass defined default selector.
|
||||
* </p>
|
||||
*
|
||||
* @see FieldMutatingUpdateProcessor
|
||||
* @see FieldValueMutatingUpdateProcessor
|
||||
* @see FieldMutatingUpdateProcessor.FieldNameSelector
|
||||
*/
|
||||
public abstract class FieldMutatingUpdateProcessorFactory
|
||||
extends UpdateRequestProcessorFactory
|
||||
implements SolrCoreAware {
|
||||
|
||||
private static class SelectorParams {
|
||||
public Set<String> fieldName = Collections.emptySet();
|
||||
public Set<String> typeName = Collections.emptySet();
|
||||
public Collection<String> typeClass = Collections.emptyList();
|
||||
public Collection<Pattern> fieldRegex = Collections.emptyList();
|
||||
}
|
||||
|
||||
private SelectorParams inclusions = new SelectorParams();
|
||||
private Collection<SelectorParams> exclusions
|
||||
= new ArrayList<SelectorParams>();
|
||||
|
||||
private FieldMutatingUpdateProcessor.FieldNameSelector selector = null;
|
||||
|
||||
protected final FieldMutatingUpdateProcessor.FieldNameSelector getSelector() {
|
||||
if (null != selector) return selector;
|
||||
|
||||
throw new SolrException(SERVER_ERROR, "selector was never initialized, "+
|
||||
" inform(SolrCore) never called???");
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private static final SelectorParams parseSelectorParams(NamedList args) {
|
||||
SelectorParams params = new SelectorParams();
|
||||
|
||||
params.fieldName = new HashSet<String>(oneOrMany(args, "fieldName"));
|
||||
params.typeName = new HashSet<String>(oneOrMany(args, "typeName"));
|
||||
|
||||
// we can compile the patterns now
|
||||
Collection<String> patterns = oneOrMany(args, "fieldRegex");
|
||||
if (! patterns.isEmpty()) {
|
||||
params.fieldRegex = new ArrayList<Pattern>(patterns.size());
|
||||
for (String s : patterns) {
|
||||
try {
|
||||
params.fieldRegex.add(Pattern.compile(s));
|
||||
} catch (PatternSyntaxException e) {
|
||||
throw new SolrException
|
||||
(SERVER_ERROR, "Invalid 'fieldRegex' pattern: " + s, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// resolve this into actual Class objects later
|
||||
params.typeClass = oneOrMany(args, "typeClass");
|
||||
|
||||
return params;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Handles common initialization related to source fields for
|
||||
* constructoring the FieldNameSelector to be used.
|
||||
*
|
||||
* Will error if any unexpected init args are found, so subclasses should
|
||||
* remove any subclass-specific init args before calling this method.
|
||||
*/
|
||||
@SuppressWarnings("unchecked")
|
||||
@Override
|
||||
public void init(NamedList args) {
|
||||
|
||||
inclusions = parseSelectorParams(args);
|
||||
|
||||
List<Object> excList = args.getAll("exclude");
|
||||
for (Object excObj : excList) {
|
||||
if (null == excObj) {
|
||||
throw new SolrException
|
||||
(SERVER_ERROR, "'exclude' init param can not be null");
|
||||
}
|
||||
if (! (excObj instanceof NamedList) ) {
|
||||
throw new SolrException
|
||||
(SERVER_ERROR, "'exclude' init param must be <lst/>");
|
||||
}
|
||||
NamedList exc = (NamedList) excObj;
|
||||
exclusions.add(parseSelectorParams(exc));
|
||||
if (0 < exc.size()) {
|
||||
throw new SolrException(SERVER_ERROR,
|
||||
"Unexpected 'exclude' init sub-param(s): '" +
|
||||
args.getName(0) + "'");
|
||||
}
|
||||
// call once per instance
|
||||
args.remove("exclude");
|
||||
}
|
||||
if (0 < args.size()) {
|
||||
throw new SolrException(SERVER_ERROR,
|
||||
"Unexpected init param(s): '" +
|
||||
args.getName(0) + "'");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public void inform(final SolrCore core) {
|
||||
|
||||
final IndexSchema schema = core.getSchema();
|
||||
|
||||
selector =
|
||||
FieldMutatingUpdateProcessor.createFieldNameSelector
|
||||
(core.getResourceLoader(),
|
||||
core.getSchema(),
|
||||
inclusions.fieldName,
|
||||
inclusions.typeName,
|
||||
inclusions.typeClass,
|
||||
inclusions.fieldRegex,
|
||||
getDefaultSelector(core));
|
||||
|
||||
for (SelectorParams exc : exclusions) {
|
||||
selector = FieldMutatingUpdateProcessor.wrap
|
||||
(selector,
|
||||
FieldMutatingUpdateProcessor.createFieldNameSelector
|
||||
(core.getResourceLoader(),
|
||||
core.getSchema(),
|
||||
exc.fieldName,
|
||||
exc.typeName,
|
||||
exc.typeClass,
|
||||
exc.fieldRegex,
|
||||
FieldMutatingUpdateProcessor.SELECT_NO_FIELDS));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Defines the default selection behavior when the user has not
|
||||
* configured any specific criteria for selecting fields. The Default
|
||||
* implementation matches all fields, and should be overridden by subclasses
|
||||
* as needed.
|
||||
*
|
||||
* @see FieldMutatingUpdateProcessor#SELECT_ALL_FIELDS
|
||||
*/
|
||||
protected FieldMutatingUpdateProcessor.FieldNameSelector
|
||||
getDefaultSelector(final SolrCore core) {
|
||||
|
||||
return FieldMutatingUpdateProcessor.SELECT_ALL_FIELDS;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes all instance of the key from NamedList, returning the Set of
|
||||
* Strings that key refered to. Throws an error if the key didn't refer
|
||||
* to one or more strings (or arrays of strings)
|
||||
* @exception SolrException invalid arr/str structure.
|
||||
*/
|
||||
private static Collection<String> oneOrMany(final NamedList args, final String key) {
|
||||
List<String> result = new ArrayList<String>(args.size() / 2);
|
||||
final String err = "init arg '" + key + "' must be a string "
|
||||
+ "(ie: 'str'), or an array (ie: 'arr') containing strings; found: ";
|
||||
|
||||
for (Object o = args.remove(key); null != o; o = args.remove(key)) {
|
||||
if (o instanceof String) {
|
||||
result.add((String)o);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (o instanceof Object[]) {
|
||||
o = Arrays.asList((Object[]) o);
|
||||
}
|
||||
|
||||
if (o instanceof Collection) {
|
||||
for (Object item : (Collection)o) {
|
||||
if (! (item instanceof String)) {
|
||||
throw new SolrException(SERVER_ERROR, err + item.getClass());
|
||||
}
|
||||
result.add((String)item);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// who knows what the hell we have
|
||||
throw new SolrException(SERVER_ERROR, err + o.getClass());
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,81 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.update.processor;
|
||||
|
||||
import org.apache.solr.common.SolrInputField;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* Abstract subclass of FieldMutatingUpdateProcessor for implementing
|
||||
* UpdateProcessors that will mutate all individual values of a selected
|
||||
* field independently
|
||||
*
|
||||
* @see FieldMutatingUpdateProcessorFactory
|
||||
*/
|
||||
public abstract class FieldValueMutatingUpdateProcessor
|
||||
extends FieldMutatingUpdateProcessor {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(FieldValueMutatingUpdateProcessor.class);
|
||||
|
||||
|
||||
public static final Object DELETE_VALUE_SINGLETON = new Object() {
|
||||
public String toString() {
|
||||
return "!!Singleton Object Triggering Value Deletion!!";
|
||||
}
|
||||
};
|
||||
|
||||
public FieldValueMutatingUpdateProcessor(FieldNameSelector selector,
|
||||
UpdateRequestProcessor next) {
|
||||
super(selector, next);
|
||||
}
|
||||
|
||||
/**
|
||||
* Mutates individual values of a field as needed, or returns the original
|
||||
* value.
|
||||
*
|
||||
* @param src a value from a matched field which should be mutated
|
||||
* @return the value to use as a replacement for src, or
|
||||
* <code>DELETE_VALUE_SINGLETON</code> to indicate that the value
|
||||
* should be removed completely.
|
||||
* @see #DELETE_VALUE_SINGLETON
|
||||
*/
|
||||
protected abstract Object mutateValue(final Object src);
|
||||
|
||||
protected final SolrInputField mutate(final SolrInputField src) {
|
||||
SolrInputField result = new SolrInputField(src.getName());
|
||||
for (final Object srcVal : src.getValues()) {
|
||||
final Object destVal = mutateValue(srcVal);
|
||||
if (DELETE_VALUE_SINGLETON == destVal) {
|
||||
/* NOOP */
|
||||
log.debug("removing value from field '{}': {}",
|
||||
src.getName(), srcVal);
|
||||
} else {
|
||||
if (destVal != srcVal) {
|
||||
log.debug("replace value from field '{}': {} with {}",
|
||||
new Object[] { src.getName(), srcVal, destVal });
|
||||
}
|
||||
result.addValue(destVal, 1.0F);
|
||||
}
|
||||
}
|
||||
result.setBoost(src.getBoost());
|
||||
return 0 == result.getValueCount() ? null : result;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,59 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.update.processor;
|
||||
|
||||
import org.apache.solr.core.SolrCore;
|
||||
|
||||
import org.apache.solr.common.SolrInputField;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
|
||||
import java.util.Collection;
|
||||
|
||||
/**
|
||||
* Base class for processors that want to mutate selected fields to only
|
||||
* keep a subset of the original values.
|
||||
* @see #pickSubset
|
||||
*/
|
||||
public abstract class FieldValueSubsetUpdateProcessorFactory extends FieldMutatingUpdateProcessorFactory {
|
||||
|
||||
@Override
|
||||
public final UpdateRequestProcessor getInstance(SolrQueryRequest req,
|
||||
SolrQueryResponse rsp,
|
||||
UpdateRequestProcessor next) {
|
||||
return new FieldMutatingUpdateProcessor(getSelector(), next) {
|
||||
protected SolrInputField mutate(final SolrInputField src) {
|
||||
if (src.getValueCount() <= 1) return src;
|
||||
|
||||
SolrInputField result = new SolrInputField(src.getName());
|
||||
result.setValue(pickSubset(src.getValues()),
|
||||
src.getBoost());
|
||||
return result;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Method subclasses must override to specify which values should be kept.
|
||||
* This method will not be called unless the collection contains more then
|
||||
* one value.
|
||||
*/
|
||||
protected abstract Collection<Object> pickSubset(Collection<Object> values);
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,65 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.update.processor;
|
||||
|
||||
import org.apache.solr.core.SolrCore;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.Collection;
|
||||
import java.util.Iterator;
|
||||
|
||||
/**
|
||||
* Keeps only the first value of fields matching the specified
|
||||
* conditions. Correct behavior assumes that the SolrInputFields being mutated
|
||||
* are either single valued, or use an ordered Collection (ie: not a Set).
|
||||
* <p>
|
||||
* By default, this processor matches no fields.
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* For example, in the configuration below, if a field named
|
||||
* <code>primary_author</code> contained multiple values (ie:
|
||||
* <code>"Adam Doe", "Bob Smith", "Carla Jones"</code>) then only the first
|
||||
* value (ie: <code>"Adam Doe"</code>) will be kept
|
||||
* </p>
|
||||
*
|
||||
* <pre class="prettyprint">
|
||||
* <processor class="solr.FirstFieldValueUpdateProcessorFactory">
|
||||
* <str name="fieldName">primary_author</str>
|
||||
* </processor>
|
||||
* </pre>
|
||||
*
|
||||
* @see LastFieldValueUpdateProcessorFactory
|
||||
*/
|
||||
public final class FirstFieldValueUpdateProcessorFactory extends FieldValueSubsetUpdateProcessorFactory {
|
||||
|
||||
@Override
|
||||
public Collection<Object> pickSubset(Collection<Object> values) {
|
||||
// trust the iterator
|
||||
return Collections.singletonList(values.iterator().next());
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldMutatingUpdateProcessor.FieldNameSelector
|
||||
getDefaultSelector(final SolrCore core) {
|
||||
|
||||
return FieldMutatingUpdateProcessor.SELECT_NO_FIELDS;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,88 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.update.processor;
|
||||
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
|
||||
import org.apache.lucene.analysis.CharReader;
|
||||
import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
import java.io.StringWriter;
|
||||
|
||||
/**
|
||||
* Strips all HTML Markup in any CharSequence values
|
||||
* found in fields matching the specified conditions.
|
||||
* <p>
|
||||
* By default this processor matches no fields
|
||||
* </p>
|
||||
*
|
||||
* <p>For example, with the configuration listed below any documents
|
||||
* containing HTML markup in any field declared in the schema using
|
||||
* <code>StrField</code> will have that HTML striped away.
|
||||
* </p>
|
||||
* <pre class="prettyprint">
|
||||
* <processor class="solr.HTMLStripFieldUpdateProcessorFactory">
|
||||
* <str name="typeClass">solr.StrField</str>
|
||||
* </processor>
|
||||
* </pre>
|
||||
*/
|
||||
public final class HTMLStripFieldUpdateProcessorFactory extends FieldMutatingUpdateProcessorFactory {
|
||||
|
||||
@Override
|
||||
public FieldMutatingUpdateProcessor.FieldNameSelector
|
||||
getDefaultSelector(final SolrCore core) {
|
||||
|
||||
return FieldMutatingUpdateProcessor.SELECT_NO_FIELDS;
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public UpdateRequestProcessor getInstance(SolrQueryRequest req,
|
||||
SolrQueryResponse rsp,
|
||||
UpdateRequestProcessor next) {
|
||||
return new FieldValueMutatingUpdateProcessor(getSelector(), next) {
|
||||
protected Object mutateValue(final Object src) {
|
||||
if (src instanceof CharSequence) {
|
||||
CharSequence s = (CharSequence)src;
|
||||
StringWriter result = new StringWriter(s.length());
|
||||
Reader in = null;
|
||||
try {
|
||||
in = new HTMLStripCharFilter
|
||||
(CharReader.get(new StringReader(s.toString())));
|
||||
IOUtils.copy(in, result);
|
||||
return result.toString();
|
||||
} catch (IOException e) {
|
||||
// we tried and failed
|
||||
return s;
|
||||
} finally {
|
||||
IOUtils.closeQuietly(in);
|
||||
}
|
||||
|
||||
}
|
||||
return src;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,81 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.update.processor;
|
||||
|
||||
import org.apache.solr.core.SolrCore;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.SortedSet;
|
||||
import java.util.Iterator;
|
||||
|
||||
/**
|
||||
* Keeps only the last value of fields matching the specified
|
||||
* conditions. Correct behavior assumes that the SolrInputFields being mutated
|
||||
* are either single valued, or use an ordered Collection (ie: not a Set).
|
||||
* <p>
|
||||
* By default, this processor matches no fields.
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* For example, in the configuration below, if a field named
|
||||
* <code>primary_author</code> contained multiple values (ie:
|
||||
* <code>"Adam Doe", "Bob Smith", "Carla Jones"</code>) then only the last
|
||||
* value (ie: <code>"Carla Jones"</code>) will be kept
|
||||
* </p>
|
||||
*
|
||||
* <pre class="prettyprint">
|
||||
* <processor class="solr.LastFieldValueUpdateProcessorFactory">
|
||||
* <str name="fieldName">primary_author</str>
|
||||
* </processor>
|
||||
* </pre>
|
||||
*
|
||||
* @see FirstFieldValueUpdateProcessorFactory
|
||||
*/
|
||||
public final class LastFieldValueUpdateProcessorFactory extends FieldValueSubsetUpdateProcessorFactory {
|
||||
|
||||
@Override
|
||||
public Collection<Object> pickSubset(Collection<Object> values) {
|
||||
|
||||
Object result = null;
|
||||
|
||||
if (values instanceof List) {
|
||||
// optimize index lookup
|
||||
List l = (List)values;
|
||||
result = l.get(l.size()-1);
|
||||
} else if (values instanceof SortedSet) {
|
||||
// optimize tail lookup
|
||||
result = ((SortedSet)values).last();
|
||||
} else {
|
||||
// trust the iterator
|
||||
for (Object o : values) { result = o; }
|
||||
}
|
||||
|
||||
return Collections.singletonList(result);
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldMutatingUpdateProcessor.FieldNameSelector
|
||||
getDefaultSelector(final SolrCore core) {
|
||||
|
||||
return FieldMutatingUpdateProcessor.SELECT_NO_FIELDS;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,75 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.update.processor;
|
||||
|
||||
import org.apache.solr.core.SolrCore;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.Collection;
|
||||
import java.util.Iterator;
|
||||
|
||||
/**
|
||||
* An update processor that keeps only the the maximum value from any selected
|
||||
* fields where multiple values are found. Correct behavior assumes that all
|
||||
* of the values in the SolrInputFields being mutated are mutually comparable;
|
||||
* If this is not the case, then the full list of all values found will be
|
||||
* used as is.
|
||||
* <p>
|
||||
* By default, this processor matches no fields.
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* In the example configuration below, if a document contains multiple integer
|
||||
* values (ie: <code>64, 128, 1024</code>) in the field
|
||||
* <code>largestFileSize</code> then only the biggest value
|
||||
* (ie: <code>1024</code>) will be kept in that field.
|
||||
* <p>
|
||||
*
|
||||
* <pre class="prettyprint">
|
||||
* <processor class="solr.MaxFieldValueUpdateProcessorFactory">
|
||||
* <str name="fieldName">largestFileSize</str>
|
||||
* </processor>
|
||||
* </pre>
|
||||
*
|
||||
* @see MinFieldValueUpdateProcessorFactory
|
||||
* @see Collections#max
|
||||
*/
|
||||
public final class MaxFieldValueUpdateProcessorFactory extends FieldValueSubsetUpdateProcessorFactory {
|
||||
|
||||
@Override
|
||||
@SuppressWarnings("unchecked")
|
||||
public Collection<Object> pickSubset(Collection<Object> values) {
|
||||
Collection<Object> result = values;
|
||||
try {
|
||||
result = Collections.singletonList
|
||||
(Collections.max((Collection)values));
|
||||
} catch (ClassCastException e) {
|
||||
/* NOOP */
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldMutatingUpdateProcessor.FieldNameSelector
|
||||
getDefaultSelector(final SolrCore core) {
|
||||
|
||||
return FieldMutatingUpdateProcessor.SELECT_NO_FIELDS;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,75 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.update.processor;
|
||||
|
||||
import org.apache.solr.core.SolrCore;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.Collection;
|
||||
import java.util.Iterator;
|
||||
|
||||
/**
|
||||
* An update processor that keeps only the the minimum value from any selected
|
||||
* fields where multiple values are found. Correct behavior assumes that all
|
||||
* of the values in the SolrInputFields being mutated are mutually comparable;
|
||||
* If this is not the case, then the full list of all values found will be
|
||||
* used as is.
|
||||
* <p>
|
||||
* By default, this processor matches no fields.
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* In the example configuration below, if a document contains multiple integer
|
||||
* values (ie: <code>64, 128, 1024</code>) in the field
|
||||
* <code>smallestFileSize</code> then only the smallest value
|
||||
* (ie: <code>64</code>) will be kept in that field.
|
||||
* <p>
|
||||
*
|
||||
* <pre class="prettyprint">
|
||||
* <processor class="solr.MinFieldValueUpdateProcessorFactory">
|
||||
* <str name="fieldName">smallestFileSize</str>
|
||||
* </processor>
|
||||
* </pre>
|
||||
*
|
||||
* @see MaxFieldValueUpdateProcessorFactory
|
||||
* @see Collections#min
|
||||
*/
|
||||
public final class MinFieldValueUpdateProcessorFactory extends FieldValueSubsetUpdateProcessorFactory {
|
||||
|
||||
@Override
|
||||
@SuppressWarnings("unchecked")
|
||||
public Collection<Object> pickSubset(Collection<Object> values) {
|
||||
Collection<Object> result = values;
|
||||
try {
|
||||
result = Collections.singletonList
|
||||
(Collections.min((Collection)values));
|
||||
} catch (ClassCastException e) {
|
||||
/* NOOP */
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldMutatingUpdateProcessor.FieldNameSelector
|
||||
getDefaultSelector(final SolrCore core) {
|
||||
|
||||
return FieldMutatingUpdateProcessor.SELECT_NO_FIELDS;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,121 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.update.processor;
|
||||
|
||||
import org.apache.solr.core.SolrCore;
|
||||
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
|
||||
import java.util.regex.PatternSyntaxException;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.regex.Matcher;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* An updated processor that applies a configured regex to any
|
||||
* CharSequence values found in the selected fields, and replaces
|
||||
* any matches with the configured replacement string
|
||||
* <p>
|
||||
* By default this processor applies itself to no fields.
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* For example, with the configuration listed below, any sequence of multiple
|
||||
* whitespace characters found in values for field named <code>title</code>
|
||||
* or <code>content</code> will be replaced by a single space character.
|
||||
* </p>
|
||||
*
|
||||
* <pre class="prettyprint">
|
||||
* <processor class="solr.RegexReplaceProcessorFactory">
|
||||
* <str name="fieldName">content</str>
|
||||
* <str name="fieldName">title</str>
|
||||
* <str name="pattern">\s+</str>
|
||||
* <str name="replacement"> </str>
|
||||
* </processor>
|
||||
* </pre>
|
||||
*
|
||||
* @see java.util.regex.Pattern
|
||||
*/
|
||||
public final class RegexReplaceProcessorFactory extends FieldMutatingUpdateProcessorFactory {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(RegexReplaceProcessorFactory.class);
|
||||
|
||||
private static final String REPLACEMENT_PARAM = "replacement";
|
||||
private static final String PATTERN_PARAM = "pattern";
|
||||
|
||||
private Pattern pattern;
|
||||
private String replacement;
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
@Override
|
||||
public void init(NamedList args) {
|
||||
|
||||
String patternParam = args.remove(PATTERN_PARAM).toString();
|
||||
|
||||
if(patternParam == null) {
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR,
|
||||
"Missing required init parameter: " + PATTERN_PARAM);
|
||||
}
|
||||
try {
|
||||
pattern = Pattern.compile(patternParam);
|
||||
} catch (PatternSyntaxException e) {
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR,
|
||||
"Invalid regex: " + patternParam, e);
|
||||
}
|
||||
|
||||
String replacementParam = args.remove(REPLACEMENT_PARAM).toString();
|
||||
if(replacementParam == null) {
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR,
|
||||
"Missing required init parameter: " + REPLACEMENT_PARAM);
|
||||
}
|
||||
replacement = Matcher.quoteReplacement(replacementParam);
|
||||
|
||||
super.init(args);
|
||||
}
|
||||
|
||||
/**
|
||||
* @see FieldMutatingUpdateProcessor#SELECT_NO_FIELDS
|
||||
*/
|
||||
protected FieldMutatingUpdateProcessor.FieldNameSelector
|
||||
getDefaultSelector(final SolrCore core) {
|
||||
|
||||
return FieldMutatingUpdateProcessor.SELECT_NO_FIELDS;
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public UpdateRequestProcessor getInstance(SolrQueryRequest request,
|
||||
SolrQueryResponse response,
|
||||
UpdateRequestProcessor next) {
|
||||
return new FieldValueMutatingUpdateProcessor(getSelector(), next) {
|
||||
protected Object mutateValue(final Object src) {
|
||||
if (src instanceof CharSequence) {
|
||||
CharSequence txt = (CharSequence)src;
|
||||
return pattern.matcher(txt).replaceAll(replacement);
|
||||
}
|
||||
return src;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
|
@ -0,0 +1,70 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.update.processor;
|
||||
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
|
||||
/**
|
||||
* Removes any values found which are CharSequence with a length of 0.
|
||||
* (ie: empty strings)
|
||||
* <p>
|
||||
* By default this processor applies itself to all fields.
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* For example, with the configuration listed below, blank strings will be
|
||||
* removed from all fields except those whose name ends with
|
||||
* "<code>_literal</code>".
|
||||
* </p>
|
||||
*
|
||||
* <pre class="prettyprint">
|
||||
* <processor class="solr.RemoveBlankFieldUpdateProcessorFactory">
|
||||
* <lst name="exclude">
|
||||
* <str name="fieldRegex">.*_literal</str>
|
||||
* </lst>
|
||||
* </processor>
|
||||
* </pre>
|
||||
*
|
||||
*/
|
||||
public final class RemoveBlankFieldUpdateProcessorFactory extends FieldMutatingUpdateProcessorFactory {
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
@Override
|
||||
public void init(NamedList args) {
|
||||
// no trim specific init args
|
||||
super.init(args);
|
||||
}
|
||||
|
||||
@Override
|
||||
public UpdateRequestProcessor getInstance(SolrQueryRequest req,
|
||||
SolrQueryResponse rsp,
|
||||
UpdateRequestProcessor next) {
|
||||
return new FieldValueMutatingUpdateProcessor(getSelector(), next) {
|
||||
protected Object mutateValue(final Object src) {
|
||||
if (src instanceof CharSequence
|
||||
&& 0 == ((CharSequence)src).length()) {
|
||||
return DELETE_VALUE_SINGLETON;
|
||||
}
|
||||
return src;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,68 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.update.processor;
|
||||
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
|
||||
|
||||
/**
|
||||
* Trims leading and trailing whitespace from any CharSequence values
|
||||
* found in fields matching the specified conditions and returns the
|
||||
* resulting String.
|
||||
* <p>
|
||||
* By default this processor matches all fields
|
||||
* </p>
|
||||
*
|
||||
* <p>For example, with the configuration listed all String field values
|
||||
* will have leading and trailing spaces removed except for fields whose
|
||||
* named ends with "<code>_literal</code>".
|
||||
* </p>
|
||||
* <pre class="prettyprint">
|
||||
* <processor class="solr.TrimFieldUpdateProcessorFactory">
|
||||
* <lst name="exclude">
|
||||
* <str name="fieldRegex">.*_literal</str>
|
||||
* </lst>
|
||||
* </processor>
|
||||
* </pre>
|
||||
*/
|
||||
public final class TrimFieldUpdateProcessorFactory extends FieldMutatingUpdateProcessorFactory {
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
@Override
|
||||
public void init(NamedList args) {
|
||||
// no trim specific init args
|
||||
super.init(args);
|
||||
}
|
||||
|
||||
@Override
|
||||
public UpdateRequestProcessor getInstance(SolrQueryRequest req,
|
||||
SolrQueryResponse rsp,
|
||||
UpdateRequestProcessor next) {
|
||||
return new FieldValueMutatingUpdateProcessor(getSelector(), next) {
|
||||
protected Object mutateValue(final Object src) {
|
||||
if (src instanceof CharSequence) {
|
||||
return ((CharSequence)src).toString().trim();
|
||||
}
|
||||
return src;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,235 @@
|
|||
<?xml version="1.0" ?>
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<!--
|
||||
Test Config that enumerates many different update processor chain
|
||||
configurations.
|
||||
|
||||
-->
|
||||
<config>
|
||||
<luceneMatchVersion>${tests.luceneMatchVersion:LUCENE_CURRENT}</luceneMatchVersion>
|
||||
<requestHandler name="standard" class="solr.StandardRequestHandler"></requestHandler>
|
||||
|
||||
<updateRequestProcessorChain name="comprehensive">
|
||||
<processor class="solr.FieldLengthUpdateProcessorFactory">
|
||||
<arr name="typeClass">
|
||||
<str>solr.TrieIntField</str>
|
||||
<str>solr.TrieLongField</str>
|
||||
</arr>
|
||||
</processor>
|
||||
<processor class="solr.MinFieldValueUpdateProcessorFactory">
|
||||
<str name="fieldName">min_foo_l</str>
|
||||
</processor>
|
||||
<processor class="solr.MaxFieldValueUpdateProcessorFactory">
|
||||
<str name="fieldName">max_foo_l</str>
|
||||
</processor>
|
||||
<processor class="solr.ConcatFieldUpdateProcessorFactory">
|
||||
<str name="delimiter">; </str>
|
||||
<lst name="exclude">
|
||||
<str name="fieldName">primary_author_s1</str>
|
||||
</lst>
|
||||
</processor>
|
||||
<processor class="solr.FirstFieldValueUpdateProcessorFactory">
|
||||
<str name="fieldName">primary_author_s1</str>
|
||||
<str name="fieldName">first_foo_l</str>
|
||||
</processor>
|
||||
<processor class="solr.RunUpdateProcessorFactory" />
|
||||
</updateRequestProcessorChain>
|
||||
|
||||
|
||||
<updateRequestProcessorChain name="trim-all">
|
||||
<!-- no specific src field configs, so all fields should get trimmed -->
|
||||
<processor class="solr.TrimFieldUpdateProcessorFactory">
|
||||
</processor>
|
||||
</updateRequestProcessorChain>
|
||||
|
||||
<updateRequestProcessorChain name="trim-field">
|
||||
<processor class="solr.TrimFieldUpdateProcessorFactory">
|
||||
<str name="fieldName">foo_t</str>
|
||||
</processor>
|
||||
</updateRequestProcessorChain>
|
||||
|
||||
|
||||
<updateRequestProcessorChain name="trim-most">
|
||||
<!-- all fields except the exclusions should be trimmed -->
|
||||
<processor class="solr.TrimFieldUpdateProcessorFactory">
|
||||
<lst name="exclude">
|
||||
<str name="fieldName">foo_t</str>
|
||||
</lst>
|
||||
</processor>
|
||||
</updateRequestProcessorChain>
|
||||
<updateRequestProcessorChain name="trim-many">
|
||||
<processor class="solr.TrimFieldUpdateProcessorFactory">
|
||||
<str name="fieldRegex">foo.*</str>
|
||||
<str name="fieldRegex">bar.*</str>
|
||||
<lst name="exclude">
|
||||
<str name="fieldRegex">.*HOSS.*</str>
|
||||
</lst>
|
||||
</processor>
|
||||
</updateRequestProcessorChain>
|
||||
<updateRequestProcessorChain name="trim-few">
|
||||
<processor class="solr.TrimFieldUpdateProcessorFactory">
|
||||
<str name="fieldRegex">foo.*</str>
|
||||
<str name="fieldRegex">bar.*</str>
|
||||
<!-- each set of exclusions is checked independently -->
|
||||
<lst name="exclude">
|
||||
<str name="typeClass">solr.DateField</str>
|
||||
</lst>
|
||||
<lst name="exclude">
|
||||
<str name="fieldRegex">.*HOSS.*</str>
|
||||
</lst>
|
||||
</processor>
|
||||
</updateRequestProcessorChain>
|
||||
<updateRequestProcessorChain name="trim-some">
|
||||
<processor class="solr.TrimFieldUpdateProcessorFactory">
|
||||
<str name="fieldRegex">foo.*</str>
|
||||
<str name="fieldRegex">bar.*</str>
|
||||
<!-- only excluded if it matches all in set -->
|
||||
<lst name="exclude">
|
||||
<str name="typeClass">solr.DateField</str>
|
||||
<str name="fieldRegex">.*HOSS.*</str>
|
||||
</lst>
|
||||
</processor>
|
||||
</updateRequestProcessorChain>
|
||||
|
||||
<updateRequestProcessorChain name="trim-fields">
|
||||
<processor class="solr.TrimFieldUpdateProcessorFactory">
|
||||
<str name="fieldName">name</str>
|
||||
<str name="fieldName">foo_t</str>
|
||||
</processor>
|
||||
</updateRequestProcessorChain>
|
||||
<updateRequestProcessorChain name="trim-fields-arr">
|
||||
<processor class="solr.TrimFieldUpdateProcessorFactory">
|
||||
<arr name="fieldName">
|
||||
<str>name</str>
|
||||
<str>foo_t</str>
|
||||
</arr>
|
||||
</processor>
|
||||
</updateRequestProcessorChain>
|
||||
|
||||
<updateRequestProcessorChain name="trim-field-regexes">
|
||||
<processor class="solr.TrimFieldUpdateProcessorFactory">
|
||||
<str name="fieldRegex">foo.*</str>
|
||||
<str name="fieldRegex">bar.*_s</str>
|
||||
</processor>
|
||||
</updateRequestProcessorChain>
|
||||
|
||||
<updateRequestProcessorChain name="trim-types">
|
||||
<processor class="solr.TrimFieldUpdateProcessorFactory">
|
||||
<str name="typeName">nametext</str>
|
||||
<str name="typeName">text_sw</str>
|
||||
</processor>
|
||||
</updateRequestProcessorChain>
|
||||
|
||||
<updateRequestProcessorChain name="trim-classes">
|
||||
<processor class="solr.TrimFieldUpdateProcessorFactory">
|
||||
<str name="typeClass">solr.DateField</str>
|
||||
<str name="typeClass">solr.StrField</str>
|
||||
</processor>
|
||||
</updateRequestProcessorChain>
|
||||
|
||||
<updateRequestProcessorChain name="trim-multi">
|
||||
<processor class="solr.TrimFieldUpdateProcessorFactory">
|
||||
<str name="typeClass">solr.DateField</str>
|
||||
<str name="typeClass">solr.StrField</str>
|
||||
<arr name="fieldRegex">
|
||||
<str>foo.*</str>
|
||||
</arr>
|
||||
</processor>
|
||||
</updateRequestProcessorChain>
|
||||
|
||||
<updateRequestProcessorChain name="remove-all-blanks">
|
||||
<processor class="solr.RemoveBlankFieldUpdateProcessorFactory">
|
||||
</processor>
|
||||
</updateRequestProcessorChain>
|
||||
|
||||
<updateRequestProcessorChain name="length-none">
|
||||
<processor class="solr.FieldLengthUpdateProcessorFactory">
|
||||
<!-- by default, the processor doesn't touch anything -->
|
||||
</processor>
|
||||
</updateRequestProcessorChain>
|
||||
<updateRequestProcessorChain name="length-some">
|
||||
<processor class="solr.FieldLengthUpdateProcessorFactory">
|
||||
<arr name="fieldRegex">
|
||||
<str>foo.*</str>
|
||||
<str>yak.*</str>
|
||||
</arr>
|
||||
</processor>
|
||||
</updateRequestProcessorChain>
|
||||
|
||||
<updateRequestProcessorChain name="concat-defaults">
|
||||
<processor class="solr.ConcatFieldUpdateProcessorFactory">
|
||||
</processor>
|
||||
</updateRequestProcessorChain>
|
||||
<updateRequestProcessorChain name="concat-field">
|
||||
<processor class="solr.ConcatFieldUpdateProcessorFactory">
|
||||
<str name="fieldName">foo_s</str>
|
||||
</processor>
|
||||
</updateRequestProcessorChain>
|
||||
<updateRequestProcessorChain name="concat-type-delim">
|
||||
<processor class="solr.ConcatFieldUpdateProcessorFactory">
|
||||
<str name="typeName">string</str>
|
||||
<str name="delimiter">; </str>
|
||||
</processor>
|
||||
</updateRequestProcessorChain>
|
||||
|
||||
<updateRequestProcessorChain name="first-value">
|
||||
<processor class="solr.FirstFieldValueUpdateProcessorFactory">
|
||||
<str name="fieldName">foo_s</str>
|
||||
<str name="fieldName">bar_s</str>
|
||||
</processor>
|
||||
</updateRequestProcessorChain>
|
||||
<updateRequestProcessorChain name="last-value">
|
||||
<processor class="solr.LastFieldValueUpdateProcessorFactory">
|
||||
<str name="fieldName">foo_s</str>
|
||||
<str name="fieldName">bar_s</str>
|
||||
</processor>
|
||||
</updateRequestProcessorChain>
|
||||
<updateRequestProcessorChain name="min-value">
|
||||
<processor class="solr.MinFieldValueUpdateProcessorFactory">
|
||||
<str name="fieldName">foo_i</str>
|
||||
<str name="fieldName">foo_s</str>
|
||||
<str name="fieldName">bar_s</str>
|
||||
</processor>
|
||||
</updateRequestProcessorChain>
|
||||
<updateRequestProcessorChain name="max-value">
|
||||
<processor class="solr.MaxFieldValueUpdateProcessorFactory">
|
||||
<str name="fieldName">foo_i</str>
|
||||
<str name="fieldName">foo_s</str>
|
||||
<str name="fieldName">bar_s</str>
|
||||
</processor>
|
||||
</updateRequestProcessorChain>
|
||||
|
||||
<updateRequestProcessorChain name="html-strip">
|
||||
<processor class="solr.HTMLStripFieldUpdateProcessorFactory">
|
||||
<str name="fieldName">html_s</str>
|
||||
</processor>
|
||||
<processor class="solr.TrimFieldUpdateProcessorFactory"/>
|
||||
</updateRequestProcessorChain>
|
||||
|
||||
<updateRequestProcessorChain name="regex-replace">
|
||||
<processor class="solr.RegexReplaceProcessorFactory">
|
||||
<str name="fieldName">content</str>
|
||||
<str name="fieldName">title</str>
|
||||
<str name="pattern">\s+</str>
|
||||
<str name="replacement">X</str>
|
||||
</processor>
|
||||
</updateRequestProcessorChain>
|
||||
|
||||
</config>
|
|
@ -0,0 +1,674 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.update.processor;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.TreeSet;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Arrays;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.common.SolrInputField;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
|
||||
import org.apache.solr.core.SolrCore;
|
||||
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.request.LocalSolrQueryRequest;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
|
||||
import org.apache.solr.update.AddUpdateCommand;
|
||||
import org.apache.solr.update.processor.UpdateRequestProcessor;
|
||||
import org.apache.solr.update.processor.UpdateRequestProcessorChain;
|
||||
|
||||
import org.junit.Before;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
* Tests the basics of configuring FieldMutatingUpdateProcessors
|
||||
* (mainly via TrimFieldUpdateProcessor) and the logic of other various
|
||||
* subclasses.
|
||||
*/
|
||||
public class FieldMutatingUpdateProcessorTest extends SolrTestCaseJ4 {
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
initCore("solrconfig-update-processor-chains.xml", "schema12.xml");
|
||||
}
|
||||
|
||||
public void testComprehensive() throws Exception {
|
||||
|
||||
final String countMe = "how long is this string?";
|
||||
final int count = countMe.length();
|
||||
|
||||
processAdd("comprehensive",
|
||||
doc(f("id", "1111"),
|
||||
f("primary_author_s1",
|
||||
"XXXX", "Adam", "Sam"),
|
||||
f("all_authors_s1",
|
||||
"XXXX", "Adam", "Sam"),
|
||||
f("foo_is", countMe, new Integer(42)),
|
||||
f("first_foo_l", countMe, new Integer(-34)),
|
||||
f("max_foo_l", countMe, new Integer(-34)),
|
||||
f("min_foo_l", countMe, new Integer(-34))));
|
||||
|
||||
assertU(commit());
|
||||
|
||||
assertQ(req("id:1111")
|
||||
,"//str[@name='primary_author_s1'][.='XXXX']"
|
||||
,"//str[@name='all_authors_s1'][.='XXXX; Adam; Sam']"
|
||||
,"//arr[@name='foo_is']/int[1][.='"+count+"']"
|
||||
,"//arr[@name='foo_is']/int[2][.='42']"
|
||||
,"//long[@name='max_foo_l'][.='"+count+"']"
|
||||
,"//long[@name='first_foo_l'][.='"+count+"']"
|
||||
,"//long[@name='min_foo_l'][.='-34']"
|
||||
);
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
public void testTrimAll() throws Exception {
|
||||
SolrInputDocument d = null;
|
||||
|
||||
d = processAdd("trim-all",
|
||||
doc(f("id", "1111"),
|
||||
f("name", " Hoss ", new StringBuilder(" Man")),
|
||||
f("foo_t", " some text ", "other Text\t"),
|
||||
f("foo_d", new Integer(42)),
|
||||
field("foo_s", 5.0F, " string ")));
|
||||
|
||||
assertNotNull(d);
|
||||
|
||||
// simple stuff
|
||||
assertEquals("string", d.getFieldValue("foo_s"));
|
||||
assertEquals(Arrays.asList("some text","other Text"),
|
||||
d.getFieldValues("foo_t"));
|
||||
assertEquals(Arrays.asList("Hoss","Man"),
|
||||
d.getFieldValues("name"));
|
||||
|
||||
// slightly more interesting
|
||||
assertEquals("processor borked non string value",
|
||||
new Integer(42), d.getFieldValue("foo_d"));
|
||||
assertEquals("wrong boost",
|
||||
5.0F, d.getField("foo_s").getBoost(), 0.0F);
|
||||
}
|
||||
|
||||
public void testTrimFields() throws Exception {
|
||||
for (String chain : Arrays.asList("trim-fields", "trim-fields-arr")) {
|
||||
SolrInputDocument d = null;
|
||||
d = processAdd(chain,
|
||||
doc(f("id", "1111"),
|
||||
f("name", " Hoss ", " Man"),
|
||||
f("foo_t", " some text ", "other Text\t"),
|
||||
f("foo_s", " string ")));
|
||||
|
||||
assertNotNull(d);
|
||||
|
||||
assertEquals(" string ", d.getFieldValue("foo_s"));
|
||||
assertEquals(Arrays.asList("some text","other Text"),
|
||||
d.getFieldValues("foo_t"));
|
||||
assertEquals(Arrays.asList("Hoss","Man"),
|
||||
d.getFieldValues("name"));
|
||||
}
|
||||
}
|
||||
|
||||
public void testTrimField() throws Exception {
|
||||
SolrInputDocument d = null;
|
||||
d = processAdd("trim-field",
|
||||
doc(f("id", "1111"),
|
||||
f("name", " Hoss ", " Man"),
|
||||
f("foo_t", " some text ", "other Text\t"),
|
||||
f("foo_s", " string ")));
|
||||
|
||||
assertNotNull(d);
|
||||
|
||||
assertEquals(" string ", d.getFieldValue("foo_s"));
|
||||
assertEquals(Arrays.asList("some text","other Text"),
|
||||
d.getFieldValues("foo_t"));
|
||||
assertEquals(Arrays.asList(" Hoss "," Man"),
|
||||
d.getFieldValues("name"));
|
||||
}
|
||||
|
||||
public void testTrimRegex() throws Exception {
|
||||
SolrInputDocument d = null;
|
||||
d = processAdd("trim-field-regexes",
|
||||
doc(f("id", "1111"),
|
||||
f("foo_t", " string1 "),
|
||||
f("foozat_s", " string2 "),
|
||||
f("bar_t", " string3 "),
|
||||
f("bar_s", " string4 ")));
|
||||
|
||||
assertNotNull(d);
|
||||
|
||||
assertEquals("string1", d.getFieldValue("foo_t"));
|
||||
assertEquals("string2", d.getFieldValue("foozat_s"));
|
||||
assertEquals(" string3 ", d.getFieldValue("bar_t"));
|
||||
assertEquals("string4", d.getFieldValue("bar_s"));
|
||||
|
||||
}
|
||||
|
||||
public void testTrimTypes() throws Exception {
|
||||
SolrInputDocument d = null;
|
||||
d = processAdd("trim-types",
|
||||
doc(f("id", "1111"),
|
||||
f("foo_sw", " string0 "),
|
||||
f("name", " string1 "),
|
||||
f("title", " string2 "),
|
||||
f("bar_t", " string3 "),
|
||||
f("bar_s", " string4 ")));
|
||||
|
||||
assertNotNull(d);
|
||||
|
||||
assertEquals("string0", d.getFieldValue("foo_sw"));
|
||||
assertEquals("string1", d.getFieldValue("name"));
|
||||
assertEquals("string2", d.getFieldValue("title"));
|
||||
assertEquals(" string3 ", d.getFieldValue("bar_t"));
|
||||
assertEquals(" string4 ", d.getFieldValue("bar_s"));
|
||||
|
||||
}
|
||||
|
||||
public void testTrimClasses() throws Exception {
|
||||
SolrInputDocument d = null;
|
||||
d = processAdd("trim-classes",
|
||||
doc(f("id", "1111"),
|
||||
f("foo_t", " string1 "),
|
||||
f("foo_s", " string2 "),
|
||||
f("bar_dt", " string3 "),
|
||||
f("bar_pdt", " string4 ")));
|
||||
|
||||
assertNotNull(d);
|
||||
|
||||
assertEquals(" string1 ", d.getFieldValue("foo_t"));
|
||||
assertEquals("string2", d.getFieldValue("foo_s"));
|
||||
assertEquals("string3", d.getFieldValue("bar_dt"));
|
||||
assertEquals("string4", d.getFieldValue("bar_pdt"));
|
||||
|
||||
}
|
||||
|
||||
public void testTrimMultipleRules() throws Exception {
|
||||
SolrInputDocument d = null;
|
||||
d = processAdd("trim-multi",
|
||||
doc(f("id", "1111"),
|
||||
f("foo_t", " string1 "),
|
||||
f("foo_s", " string2 "),
|
||||
f("bar_dt", " string3 "),
|
||||
f("foo_pdt", " string4 ")));
|
||||
|
||||
assertNotNull(d);
|
||||
|
||||
assertEquals(" string1 ", d.getFieldValue("foo_t"));
|
||||
assertEquals("string2", d.getFieldValue("foo_s"));
|
||||
assertEquals(" string3 ", d.getFieldValue("bar_dt"));
|
||||
assertEquals("string4", d.getFieldValue("foo_pdt"));
|
||||
|
||||
}
|
||||
|
||||
public void testTrimExclusions() throws Exception {
|
||||
SolrInputDocument d = null;
|
||||
d = processAdd("trim-most",
|
||||
doc(f("id", "1111"),
|
||||
f("foo_t", " string1 "),
|
||||
f("foo_s", " string2 "),
|
||||
f("bar_dt", " string3 "),
|
||||
f("foo_pdt", " string4 ")));
|
||||
|
||||
assertNotNull(d);
|
||||
|
||||
assertEquals(" string1 ", d.getFieldValue("foo_t"));
|
||||
assertEquals("string2", d.getFieldValue("foo_s"));
|
||||
assertEquals("string3", d.getFieldValue("bar_dt"));
|
||||
assertEquals("string4", d.getFieldValue("foo_pdt"));
|
||||
|
||||
d = processAdd("trim-many",
|
||||
doc(f("id", "1111"),
|
||||
f("foo_t", " string1 "),
|
||||
f("foo_s", " string2 "),
|
||||
f("bar_dt", " string3 "),
|
||||
f("bar_HOSS_s", " string4 "),
|
||||
f("foo_pdt", " string5 "),
|
||||
f("foo_HOSS_pdt", " string6 ")));
|
||||
|
||||
assertNotNull(d);
|
||||
|
||||
assertEquals("string1", d.getFieldValue("foo_t"));
|
||||
assertEquals("string2", d.getFieldValue("foo_s"));
|
||||
assertEquals("string3", d.getFieldValue("bar_dt"));
|
||||
assertEquals(" string4 ", d.getFieldValue("bar_HOSS_s"));
|
||||
assertEquals("string5", d.getFieldValue("foo_pdt"));
|
||||
assertEquals(" string6 ", d.getFieldValue("foo_HOSS_pdt"));
|
||||
|
||||
d = processAdd("trim-few",
|
||||
doc(f("id", "1111"),
|
||||
f("foo_t", " string1 "),
|
||||
f("foo_s", " string2 "),
|
||||
f("bar_dt", " string3 "),
|
||||
f("bar_HOSS_s", " string4 "),
|
||||
f("foo_pdt", " string5 "),
|
||||
f("foo_HOSS_pdt", " string6 ")));
|
||||
|
||||
assertNotNull(d);
|
||||
|
||||
assertEquals("string1", d.getFieldValue("foo_t"));
|
||||
assertEquals("string2", d.getFieldValue("foo_s"));
|
||||
assertEquals(" string3 ", d.getFieldValue("bar_dt"));
|
||||
assertEquals(" string4 ", d.getFieldValue("bar_HOSS_s"));
|
||||
assertEquals(" string5 ", d.getFieldValue("foo_pdt"));
|
||||
assertEquals(" string6 ", d.getFieldValue("foo_HOSS_pdt"));
|
||||
|
||||
d = processAdd("trim-some",
|
||||
doc(f("id", "1111"),
|
||||
f("foo_t", " string1 "),
|
||||
f("foo_s", " string2 "),
|
||||
f("bar_dt", " string3 "),
|
||||
f("bar_HOSS_s", " string4 "),
|
||||
f("foo_pdt", " string5 "),
|
||||
f("foo_HOSS_pdt", " string6 ")));
|
||||
|
||||
assertNotNull(d);
|
||||
|
||||
assertEquals("string1", d.getFieldValue("foo_t"));
|
||||
assertEquals("string2", d.getFieldValue("foo_s"));
|
||||
assertEquals("string3", d.getFieldValue("bar_dt"));
|
||||
assertEquals("string4", d.getFieldValue("bar_HOSS_s"));
|
||||
assertEquals("string5", d.getFieldValue("foo_pdt"));
|
||||
assertEquals(" string6 ", d.getFieldValue("foo_HOSS_pdt"));
|
||||
}
|
||||
|
||||
public void testRemoveBlanks() throws Exception {
|
||||
SolrInputDocument d = null;
|
||||
d = processAdd("remove-all-blanks",
|
||||
doc(f("id", "1111"),
|
||||
f("foo_s", "string1", ""),
|
||||
f("bar_dt", "string2", "", "string3"),
|
||||
f("yak_t", ""),
|
||||
f("foo_d", new Integer(42))));
|
||||
|
||||
assertNotNull(d);
|
||||
|
||||
assertEquals(Arrays.asList("string1"),
|
||||
d.getFieldValues("foo_s"));
|
||||
assertEquals(Arrays.asList("string2","string3"),
|
||||
d.getFieldValues("bar_dt"));
|
||||
assertFalse("shouldn't be any values for yak_t",
|
||||
d.containsKey("yak_t"));
|
||||
assertEquals("processor borked non string value",
|
||||
new Integer(42), d.getFieldValue("foo_d"));
|
||||
|
||||
}
|
||||
|
||||
public void testStrLength() throws Exception {
|
||||
SolrInputDocument d = null;
|
||||
d = processAdd("length-none",
|
||||
doc(f("id", "1111"),
|
||||
f("foo_s", "string1", "string222"),
|
||||
f("bar_dt", "string3"),
|
||||
f("yak_t", ""),
|
||||
f("foo_d", new Integer(42))));
|
||||
|
||||
assertNotNull(d);
|
||||
|
||||
assertEquals(Arrays.asList("string1","string222"),
|
||||
d.getFieldValues("foo_s"));
|
||||
assertEquals("string3", d.getFieldValue("bar_dt"));
|
||||
assertEquals("", d.getFieldValue("yak_t"));
|
||||
assertEquals("processor borked non string value",
|
||||
new Integer(42), d.getFieldValue("foo_d"));
|
||||
|
||||
d = processAdd("length-some",
|
||||
doc(f("id", "1111"),
|
||||
f("foo_s", "string1", "string222"),
|
||||
f("bar_dt", "string3"),
|
||||
f("yak_t", ""),
|
||||
f("foo_d", new Integer(42))));
|
||||
|
||||
assertNotNull(d);
|
||||
|
||||
assertEquals(Arrays.asList(new Integer(7), new Integer(9)),
|
||||
d.getFieldValues("foo_s"));
|
||||
assertEquals("string3", d.getFieldValue("bar_dt"));
|
||||
assertEquals(new Integer(0), d.getFieldValue("yak_t"));
|
||||
assertEquals("processor borked non string value",
|
||||
new Integer(42), d.getFieldValue("foo_d"));
|
||||
}
|
||||
|
||||
public void testRegexReplace() throws Exception {
|
||||
SolrInputDocument d = null;
|
||||
d = processAdd("regex-replace",
|
||||
doc(f("id", "doc1"),
|
||||
f("content", "This is a text\t with a lot\n of whitespace"),
|
||||
f("title", "This\ttitle has a lot of spaces")));
|
||||
|
||||
assertNotNull(d);
|
||||
|
||||
assertEquals("ThisXisXaXtextXwithXaXlotXofXwhitespace",
|
||||
d.getFieldValue("content"));
|
||||
assertEquals("ThisXtitleXhasXaXlotXofXspaces",
|
||||
d.getFieldValue("title"));
|
||||
}
|
||||
|
||||
public void testFirstValue() throws Exception {
|
||||
SolrInputDocument d = null;
|
||||
|
||||
d = processAdd("first-value",
|
||||
doc(f("id", "1111"),
|
||||
f("foo_s", "string1", "string222"),
|
||||
f("bar_s", "string3"),
|
||||
f("yak_t", "string4", "string5")));
|
||||
|
||||
assertNotNull(d);
|
||||
|
||||
assertEquals(Arrays.asList("string1"),
|
||||
d.getFieldValues("foo_s"));
|
||||
assertEquals(Arrays.asList("string3"),
|
||||
d.getFieldValues("bar_s"));
|
||||
assertEquals(Arrays.asList("string4", "string5"),
|
||||
d.getFieldValues("yak_t"));
|
||||
}
|
||||
|
||||
public void testLastValue() throws Exception {
|
||||
SolrInputDocument d = null;
|
||||
|
||||
// basics
|
||||
|
||||
d = processAdd("last-value",
|
||||
doc(f("id", "1111"),
|
||||
f("foo_s", "string1", "string222"),
|
||||
f("bar_s", "string3"),
|
||||
f("yak_t", "string4", "string5")));
|
||||
|
||||
assertNotNull(d);
|
||||
|
||||
assertEquals(Arrays.asList("string222"),
|
||||
d.getFieldValues("foo_s"));
|
||||
assertEquals(Arrays.asList("string3"),
|
||||
d.getFieldValues("bar_s"));
|
||||
assertEquals(Arrays.asList("string4", "string5"),
|
||||
d.getFieldValues("yak_t"));
|
||||
|
||||
// test optimizations (and force test of defaults)
|
||||
|
||||
SolrInputField special = null;
|
||||
|
||||
// test something that's definitely a SortedSet
|
||||
|
||||
special = new SolrInputField("foo_s");
|
||||
special.setValue(new TreeSet<String>
|
||||
(Arrays.asList("ggg", "first", "last", "hhh")), 1.2F);
|
||||
|
||||
d = processAdd("last-value",
|
||||
doc(f("id", "1111"),
|
||||
special));
|
||||
|
||||
assertNotNull(d);
|
||||
|
||||
assertEquals("last", d.getFieldValue("foo_s"));
|
||||
|
||||
// test something that's definitely a List
|
||||
|
||||
special = new SolrInputField("foo_s");
|
||||
special.setValue(Arrays.asList("first", "ggg", "hhh", "last"), 1.2F);
|
||||
|
||||
d = processAdd("last-value",
|
||||
doc(f("id", "1111"),
|
||||
special));
|
||||
|
||||
assertNotNull(d);
|
||||
|
||||
assertEquals("last", d.getFieldValue("foo_s"));
|
||||
|
||||
// test something that is definitely not a List or SortedSet
|
||||
// (ie: get default behavior of Collection using iterator)
|
||||
|
||||
special = new SolrInputField("foo_s");
|
||||
special.setValue(new LinkedHashSet<String>
|
||||
(Arrays.asList("first", "ggg", "hhh", "last")), 1.2F);
|
||||
|
||||
d = processAdd("last-value",
|
||||
doc(f("id", "1111"),
|
||||
special));
|
||||
|
||||
assertNotNull(d);
|
||||
|
||||
assertEquals("last", d.getFieldValue("foo_s"));
|
||||
|
||||
|
||||
}
|
||||
|
||||
public void testMinValue() throws Exception {
|
||||
SolrInputDocument d = null;
|
||||
|
||||
d = processAdd("min-value",
|
||||
doc(f("id", "1111"),
|
||||
f("foo_s", "zzz", "aaa", "bbb"),
|
||||
f("foo_i", 42, 128, -3),
|
||||
f("bar_s", "aaa"),
|
||||
f("yak_t", "aaa", "bbb")));
|
||||
|
||||
assertNotNull(d);
|
||||
|
||||
assertEquals(Arrays.asList("aaa"),
|
||||
d.getFieldValues("foo_s"));
|
||||
assertEquals(Arrays.asList(-3),
|
||||
d.getFieldValues("foo_i"));
|
||||
assertEquals(Arrays.asList("aaa"),
|
||||
d.getFieldValues("bar_s"));
|
||||
assertEquals(Arrays.asList("aaa", "bbb"),
|
||||
d.getFieldValues("yak_t"));
|
||||
|
||||
// uncomparable should not fail
|
||||
|
||||
d = processAdd("min-value",
|
||||
doc(f("id", "1111"),
|
||||
f("foo_s", "zzz", new Integer(42), "bbb"),
|
||||
f("bar_s", "aaa"),
|
||||
f("yak_t", "aaa", "bbb")));
|
||||
|
||||
assertNotNull(d);
|
||||
|
||||
assertEquals(Arrays.asList("zzz", new Integer(42), "bbb"),
|
||||
d.getFieldValues("foo_s"));
|
||||
assertEquals(Arrays.asList("aaa"),
|
||||
d.getFieldValues("bar_s"));
|
||||
assertEquals(Arrays.asList("aaa", "bbb"),
|
||||
d.getFieldValues("yak_t"));
|
||||
|
||||
|
||||
}
|
||||
|
||||
public void testMaxValue() throws Exception {
|
||||
SolrInputDocument d = null;
|
||||
|
||||
d = processAdd("max-value",
|
||||
doc(f("id", "1111"),
|
||||
f("foo_s", "zzz", "aaa", "bbb"),
|
||||
f("foo_i", 42, 128, -3),
|
||||
f("bar_s", "aaa"),
|
||||
f("yak_t", "aaa", "bbb")));
|
||||
|
||||
assertNotNull(d);
|
||||
|
||||
assertEquals(Arrays.asList("zzz"),
|
||||
d.getFieldValues("foo_s"));
|
||||
assertEquals(Arrays.asList(128),
|
||||
d.getFieldValues("foo_i"));
|
||||
assertEquals(Arrays.asList("aaa"),
|
||||
d.getFieldValues("bar_s"));
|
||||
assertEquals(Arrays.asList("aaa", "bbb"),
|
||||
d.getFieldValues("yak_t"));
|
||||
|
||||
// uncomparable should not fail
|
||||
|
||||
d = processAdd("max-value",
|
||||
doc(f("id", "1111"),
|
||||
f("foo_s", "zzz", new Integer(42), "bbb"),
|
||||
f("bar_s", "aaa"),
|
||||
f("yak_t", "aaa", "bbb")));
|
||||
|
||||
assertNotNull(d);
|
||||
|
||||
assertEquals(Arrays.asList("zzz", new Integer(42), "bbb"),
|
||||
d.getFieldValues("foo_s"));
|
||||
assertEquals(Arrays.asList("aaa"),
|
||||
d.getFieldValues("bar_s"));
|
||||
assertEquals(Arrays.asList("aaa", "bbb"),
|
||||
d.getFieldValues("yak_t"));
|
||||
|
||||
|
||||
}
|
||||
|
||||
public void testHtmlStrip() throws Exception {
|
||||
SolrInputDocument d = null;
|
||||
|
||||
d = processAdd("html-strip",
|
||||
doc(f("id", "1111"),
|
||||
f("html_s", "<body>hi & bye", "aaa", "bbb"),
|
||||
f("bar_s", "<body>hi & bye")));
|
||||
|
||||
assertNotNull(d);
|
||||
|
||||
assertEquals(Arrays.asList("hi & bye", "aaa", "bbb"),
|
||||
d.getFieldValues("html_s"));
|
||||
assertEquals("<body>hi & bye", d.getFieldValue("bar_s"));
|
||||
|
||||
}
|
||||
|
||||
public void testConcatDefaults() throws Exception {
|
||||
SolrInputDocument d = null;
|
||||
d = processAdd("concat-defaults",
|
||||
doc(f("id", "1111", "222"),
|
||||
f("attr_foo", "string1", "string2"),
|
||||
f("foo_s1", "string3", "string4"),
|
||||
f("bar_dt", "string5", "string6"),
|
||||
f("bar_HOSS_s", "string7", "string8"),
|
||||
f("foo_d", new Integer(42))));
|
||||
|
||||
assertNotNull(d);
|
||||
|
||||
assertEquals("1111, 222", d.getFieldValue("id"));
|
||||
assertEquals(Arrays.asList("string1","string2"),
|
||||
d.getFieldValues("attr_foo"));
|
||||
assertEquals("string3, string4", d.getFieldValue("foo_s1"));
|
||||
assertEquals(Arrays.asList("string5","string6"),
|
||||
d.getFieldValues("bar_dt"));
|
||||
assertEquals(Arrays.asList("string7","string8"),
|
||||
d.getFieldValues("bar_HOSS_s"));
|
||||
assertEquals("processor borked non string value",
|
||||
new Integer(42), d.getFieldValue("foo_d"));
|
||||
|
||||
}
|
||||
|
||||
public void testConcatExplicit() throws Exception {
|
||||
doSimpleDelimTest("concat-field", ", ");
|
||||
}
|
||||
public void testConcatExplicitWithDelim() throws Exception {
|
||||
doSimpleDelimTest("concat-type-delim", "; ");
|
||||
}
|
||||
private void doSimpleDelimTest(final String chain, final String delim)
|
||||
throws Exception {
|
||||
|
||||
SolrInputDocument d = null;
|
||||
d = processAdd(chain,
|
||||
doc(f("id", "1111"),
|
||||
f("foo_t", "string1", "string2"),
|
||||
f("foo_d", new Integer(42)),
|
||||
field("foo_s", 3.0F, "string3", "string4")));
|
||||
|
||||
assertNotNull(d);
|
||||
|
||||
assertEquals(Arrays.asList("string1","string2"),
|
||||
d.getFieldValues("foo_t"));
|
||||
assertEquals("string3" + delim + "string4", d.getFieldValue("foo_s"));
|
||||
|
||||
// slightly more interesting
|
||||
assertEquals("processor borked non string value",
|
||||
new Integer(42), d.getFieldValue("foo_d"));
|
||||
assertEquals("wrong boost",
|
||||
3.0F, d.getField("foo_s").getBoost(), 0.0F);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convinience method for building up SolrInputDocuments
|
||||
*/
|
||||
SolrInputDocument doc(SolrInputField... fields) {
|
||||
SolrInputDocument d = new SolrInputDocument();
|
||||
for (SolrInputField f : fields) {
|
||||
d.put(f.getName(), f);
|
||||
}
|
||||
return d;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convinience method for building up SolrInputFields
|
||||
*/
|
||||
SolrInputField field(String name, float boost, Object... values) {
|
||||
SolrInputField f = new SolrInputField(name);
|
||||
for (Object v : values) {
|
||||
f.addValue(v, 1.0F);
|
||||
}
|
||||
f.setBoost(boost);
|
||||
return f;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convinience method for building up SolrInputFields with default boost
|
||||
*/
|
||||
SolrInputField f(String name, Object... values) {
|
||||
return field(name, 1.0F, values);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Runs a document through the specified chain, and returns the final
|
||||
* document used when the chain is completed (NOTE: some chains may
|
||||
* modifiy the document in place
|
||||
*/
|
||||
SolrInputDocument processAdd(final String chain,
|
||||
final SolrInputDocument docIn)
|
||||
throws IOException {
|
||||
|
||||
SolrCore core = h.getCore();
|
||||
UpdateRequestProcessorChain pc = core.getUpdateProcessingChain(chain);
|
||||
assertNotNull("No Chain named: " + chain, pc);
|
||||
|
||||
SolrQueryResponse rsp = new SolrQueryResponse();
|
||||
|
||||
SolrQueryRequest req = new LocalSolrQueryRequest
|
||||
(core, new ModifiableSolrParams());
|
||||
try {
|
||||
AddUpdateCommand cmd = new AddUpdateCommand(req);
|
||||
cmd.solrDoc = docIn;
|
||||
|
||||
UpdateRequestProcessor processor = pc.createProcessor(req, rsp);
|
||||
processor.processAdd(cmd);
|
||||
|
||||
return cmd.solrDoc;
|
||||
} finally {
|
||||
req.close();
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue