SOLR-4894: Add AddSchemaFieldsUpdateProcessorFactory: dynamically add fields to the schema if an input document contains unknown fields

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1498555 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Steven Rowe 2013-07-01 16:34:09 +00:00
parent d96faa4a3a
commit ff5f4d5be8
8 changed files with 819 additions and 23 deletions

View File

@ -73,6 +73,9 @@ public interface SolrQueryRequest {
/** The schema snapshot from core.getLatestSchema() at request creation. */ /** The schema snapshot from core.getLatestSchema() at request creation. */
public IndexSchema getSchema(); public IndexSchema getSchema();
/** Replaces the current schema snapshot with the latest from the core. */
public void updateSchemaToLatest();
/** /**
* Returns a string representing all the important parameters. * Returns a string representing all the important parameters.

View File

@ -42,8 +42,8 @@ import java.util.HashMap;
*/ */
public abstract class SolrQueryRequestBase implements SolrQueryRequest { public abstract class SolrQueryRequestBase implements SolrQueryRequest {
protected final SolrCore core; protected final SolrCore core;
protected final IndexSchema schema;
protected final SolrParams origParams; protected final SolrParams origParams;
protected volatile IndexSchema schema;
protected SolrParams params; protected SolrParams params;
protected Map<Object,Object> context; protected Map<Object,Object> context;
protected Iterable<ContentStream> streams; protected Iterable<ContentStream> streams;
@ -112,6 +112,11 @@ public abstract class SolrQueryRequestBase implements SolrQueryRequest {
return schema; return schema;
} }
@Override
public void updateSchemaToLatest() {
schema = core.getLatestSchema();
}
/** /**
* Frees resources associated with this request, this method <b>must</b> * Frees resources associated with this request, this method <b>must</b>
* be called when the object is no longer in use. * be called when the object is no longer in use.

View File

@ -169,6 +169,12 @@ public final class ManagedIndexSchema extends IndexSchema {
return addFields(Arrays.asList(newField)); return addFields(Arrays.asList(newField));
} }
public class FieldExistsException extends SolrException {
public FieldExistsException(ErrorCode code, String msg) {
super(code, msg);
}
}
@Override @Override
public ManagedIndexSchema addFields(Collection<SchemaField> newFields) { public ManagedIndexSchema addFields(Collection<SchemaField> newFields) {
ManagedIndexSchema newSchema = null; ManagedIndexSchema newSchema = null;
@ -183,7 +189,7 @@ public final class ManagedIndexSchema extends IndexSchema {
for (SchemaField newField : newFields) { for (SchemaField newField : newFields) {
if (null != newSchema.getFieldOrNull(newField.getName())) { if (null != newSchema.getFieldOrNull(newField.getName())) {
String msg = "Field '" + newField.getName() + "' already exists."; String msg = "Field '" + newField.getName() + "' already exists.";
throw new SolrException(ErrorCode.BAD_REQUEST, msg); throw new FieldExistsException(ErrorCode.BAD_REQUEST, msg);
} }
newSchema.fields.put(newField.getName(), newField); newSchema.fields.put(newField.getName(), newField);

View File

@ -0,0 +1,349 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.update.processor;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.ManagedIndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.update.AddUpdateCommand;
import org.apache.solr.update.processor.FieldMutatingUpdateProcessorFactory.SelectorParams;
import org.apache.solr.update.processor.FieldMutatingUpdateProcessor.FieldNameSelector;
import org.apache.solr.util.plugin.SolrCoreAware;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import static org.apache.solr.common.SolrException.ErrorCode.BAD_REQUEST;
import static org.apache.solr.common.SolrException.ErrorCode.SERVER_ERROR;
/**
* <p>
* This processor will dynamically add fields to the schema if an input document contains
* one or more fields that don't match any field or dynamic field in the schema.
* </p>
* <p>
* By default, this processor selects all fields that don't match a schema field or
* dynamic field. The "fieldName" and "fieldRegex" selectors may be specified to further
* restrict the selected fields, but the other selectors ("typeName", "typeClass", and
* "fieldNameMatchesSchemaField") may not be specified.
* </p>
* <p>
* This processor is configured to map from each field's values' class(es) to the schema
* field type that will be used when adding the new field to the schema. All new fields
* are then added to the schema in a single batch. If schema addition fails for any
* field, addition is re-attempted only for those that dont match any schema
* field. This process is repeated, either until all new fields are successfully added,
* or until there are no new fields (presumably because the fields that were new when
* this processor started its work were subsequently added by a different update
* request, possibly on a different node).
* </p>
* <p>
* This processor takes as configuration a sequence of zero or more "typeMapping"-s from
* one or more "valueClass"-s, specified as either an &lt;arr&gt; of &lt;str&gt;, or
* multiple &lt;str&gt; with the same name, to an existing schema "fieldType".
* </p>
* <p>
* If more than one "valueClass" is specified in a "typeMapping", field values with any
* of the specified "valueClass"-s will be mapped to the specified target "fieldType".
* The "typeMapping"-s are attempted in the specified order; if a field value's class
* is not specified in a "valueClass", the next "typeMapping" is attempted. If no
* "typeMapping" succeeds, then the specified "defaultFieldType" is used.
* </p>
* <p>
* Example configuration:
* </p>
*
* <pre class="prettyprint">
* &lt;processor class="solr.AddSchemaFieldsUpdateProcessorFactory"&gt;
* &lt;str name="defaultFieldType"&gt;text_general&lt;/str&gt;
* &lt;lst name="typeMapping"&gt;
* &lt;str name="valueClass"&gt;Boolean&lt;/str&gt;
* &lt;str name="fieldType"&gt;boolean&lt;/str&gt;
* &lt;/lst&gt;
* &lt;lst name="typeMapping"&gt;
* &lt;str name="valueClass"&gt;Integer&lt;/str&gt;
* &lt;str name="fieldType"&gt;tint&lt;/str&gt;
* &lt;/lst&gt;
* &lt;lst name="typeMapping"&gt;
* &lt;str name="valueClass"&gt;Float&lt;/str&gt;
* &lt;str name="fieldType"&gt;tfloat&lt;/str&gt;
* &lt;/lst&gt;
* &lt;lst name="typeMapping"&gt;
* &lt;str name="valueClass"&gt;Date&lt;/str&gt;
* &lt;str name="fieldType"&gt;tdate&lt;/str&gt;
* &lt;/lst&gt;
* &lt;lst name="typeMapping"&gt;
* &lt;str name="valueClass"&gt;Long&lt;/str&gt;
* &lt;str name="valueClass"&gt;Integer&lt;/str&gt;
* &lt;str name="fieldType"&gt;tlong&lt;/str&gt;
* &lt;/lst&gt;
* &lt;lst name="typeMapping"&gt;
* &lt;arr name="valueClass"&gt;
* &lt;str&gt;Double&lt;/str&gt;
* &lt;str&gt;Float&lt;/str&gt;
* &lt;/arr&gt;
* &lt;str name="fieldType"&gt;tdouble&lt;/str&gt;
* &lt;/lst&gt;
* &lt;/processor&gt;</pre>
*/
public class AddSchemaFieldsUpdateProcessorFactory extends UpdateRequestProcessorFactory implements SolrCoreAware {
public final static Logger log = LoggerFactory.getLogger(AddSchemaFieldsUpdateProcessorFactory.class);
private static final String TYPE_MAPPING_PARAM = "typeMapping";
private static final String VALUE_CLASS_PARAM = "valueClass";
private static final String FIELD_TYPE_PARAM = "fieldType";
private static final String DEFAULT_FIELD_TYPE_PARAM = "defaultFieldType";
private List<TypeMapping> typeMappings = Collections.emptyList();
private SelectorParams inclusions = new SelectorParams();
private Collection<SelectorParams> exclusions = new ArrayList<SelectorParams>();
private FieldNameSelector selector = null;
private String defaultFieldType;
protected final FieldMutatingUpdateProcessor.FieldNameSelector getSelector() {
if (null != selector) return selector;
throw new SolrException(SERVER_ERROR, "selector was never initialized, inform(SolrCore) never called???");
}
@Override
public UpdateRequestProcessor getInstance(SolrQueryRequest req,
SolrQueryResponse rsp,
UpdateRequestProcessor next) {
return new AddSchemaFieldsUpdateProcessor(next);
}
@Override
public void init(NamedList args) {
inclusions = FieldMutatingUpdateProcessorFactory.parseSelectorParams(args);
validateSelectorParams(inclusions);
inclusions.fieldNameMatchesSchemaField = false; // Explicitly (non-configurably) require unknown field names
exclusions = FieldMutatingUpdateProcessorFactory.parseSelectorExclusionParams(args);
for (SelectorParams exclusion : exclusions) {
validateSelectorParams(exclusion);
}
Object defaultFieldTypeParam = args.remove(DEFAULT_FIELD_TYPE_PARAM);
if (null == defaultFieldTypeParam) {
throw new SolrException(SERVER_ERROR, "Missing required init param '" + DEFAULT_FIELD_TYPE_PARAM + "'");
} else {
if ( ! (defaultFieldTypeParam instanceof CharSequence)) {
throw new SolrException(SERVER_ERROR, "Init param '" + DEFAULT_FIELD_TYPE_PARAM + "' must be a <str>");
}
}
defaultFieldType = defaultFieldTypeParam.toString();
typeMappings = parseTypeMappings(args);
super.init(args);
}
@Override
public void inform(SolrCore core) {
selector = FieldMutatingUpdateProcessor.createFieldNameSelector
(core.getResourceLoader(), core, inclusions, getDefaultSelector(core));
for (SelectorParams exc : exclusions) {
selector = FieldMutatingUpdateProcessor.wrap(selector, FieldMutatingUpdateProcessor.createFieldNameSelector
(core.getResourceLoader(), core, exc, FieldMutatingUpdateProcessor.SELECT_NO_FIELDS));
}
for (TypeMapping typeMapping : typeMappings) {
typeMapping.populateValueClasses(core);
}
}
private FieldNameSelector getDefaultSelector(final SolrCore core) {
return new FieldNameSelector() {
@Override
public boolean shouldMutate(final String fieldName) {
return null == core.getLatestSchema().getFieldTypeNoEx(fieldName);
}
};
}
private static List<TypeMapping> parseTypeMappings(NamedList args) {
List<TypeMapping> typeMappings = new ArrayList<TypeMapping>();
List<Object> typeMappingsParams = args.getAll(TYPE_MAPPING_PARAM);
for (Object typeMappingObj : typeMappingsParams) {
if (null == typeMappingObj) {
throw new SolrException(SERVER_ERROR, "'" + TYPE_MAPPING_PARAM + "' init param cannot be null");
}
if ( ! (typeMappingObj instanceof NamedList) ) {
throw new SolrException(SERVER_ERROR, "'" + TYPE_MAPPING_PARAM + "' init param must be a <lst>");
}
NamedList typeMappingNamedList = (NamedList)typeMappingObj;
Object fieldTypeObj = typeMappingNamedList.remove(FIELD_TYPE_PARAM);
if (null == fieldTypeObj) {
throw new SolrException(SERVER_ERROR,
"Each '" + TYPE_MAPPING_PARAM + "' <lst/> must contain a '" + FIELD_TYPE_PARAM + "' <str>");
}
if ( ! (fieldTypeObj instanceof CharSequence)) {
throw new SolrException(SERVER_ERROR, "'" + FIELD_TYPE_PARAM + "' init param must be a <str>");
}
if (null != typeMappingNamedList.get(FIELD_TYPE_PARAM)) {
throw new SolrException(SERVER_ERROR,
"Each '" + TYPE_MAPPING_PARAM + "' <lst/> must contain a '" + FIELD_TYPE_PARAM + "' <str>");
}
String fieldType = fieldTypeObj.toString();
Collection<String> valueClasses
= FieldMutatingUpdateProcessorFactory.oneOrMany(typeMappingNamedList, VALUE_CLASS_PARAM);
if (valueClasses.isEmpty()) {
throw new SolrException(SERVER_ERROR,
"Each '" + TYPE_MAPPING_PARAM + "' <lst/> must contain at least one '" + VALUE_CLASS_PARAM + "' <str>");
}
typeMappings.add(new TypeMapping(fieldType, valueClasses));
if (0 != typeMappingNamedList.size()) {
throw new SolrException(SERVER_ERROR,
"Unexpected '" + TYPE_MAPPING_PARAM + "' init sub-param(s): '" + typeMappingNamedList.toString() + "'");
}
args.remove(TYPE_MAPPING_PARAM);
}
return typeMappings;
}
private void validateSelectorParams(SelectorParams params) {
if ( ! params.typeName.isEmpty()) {
throw new SolrException(SERVER_ERROR, "'typeName' init param is not allowed in this processor");
}
if ( ! params.typeClass.isEmpty()) {
throw new SolrException(SERVER_ERROR, "'typeClass' init param is not allowed in this processor");
}
if (null != params.fieldNameMatchesSchemaField) {
throw new SolrException(SERVER_ERROR, "'fieldNameMatchesSchemaField' init param is not allowed in this processor");
}
}
private static class TypeMapping {
public String fieldTypeName;
public Collection<String> valueClassNames;
public Set<Class<?>> valueClasses;
public TypeMapping(String fieldTypeName, Collection<String> valueClassNames) {
this.fieldTypeName = fieldTypeName;
this.valueClassNames = valueClassNames;
// this.valueClasses population is delayed until the schema is available
}
public void populateValueClasses(SolrCore core) {
IndexSchema schema = core.getLatestSchema();
ClassLoader loader = core.getResourceLoader().getClassLoader();
if (null == schema.getFieldTypeByName(fieldTypeName)) {
throw new SolrException(SERVER_ERROR, "fieldType '" + fieldTypeName + "' not found in the schema");
}
valueClasses = new HashSet<Class<?>>();
for (String valueClassName : valueClassNames) {
try {
valueClasses.add(loader.loadClass(valueClassName));
} catch (ClassNotFoundException e) {
throw new SolrException(SERVER_ERROR,
"valueClass '" + valueClassName + "' not found for fieldType '" + fieldTypeName + "'");
}
}
}
}
private class AddSchemaFieldsUpdateProcessor extends UpdateRequestProcessor {
public AddSchemaFieldsUpdateProcessor(UpdateRequestProcessor next) {
super(next);
}
@Override
public void processAdd(AddUpdateCommand cmd) throws IOException {
if ( ! cmd.getReq().getCore().getLatestSchema().isMutable()) {
final String message = "This IndexSchema is not mutable.";
throw new SolrException(BAD_REQUEST, message);
}
final SolrInputDocument doc = cmd.getSolrInputDocument();
final SolrCore core = cmd.getReq().getCore();
for (;;) {
final IndexSchema oldSchema = core.getLatestSchema();
List<SchemaField> newFields = new ArrayList<SchemaField>();
for (final String fieldName : doc.getFieldNames()) {
if (selector.shouldMutate(fieldName)) {
String fieldTypeName = mapValueClassesToFieldType(doc.getField(fieldName));
newFields.add(oldSchema.newField(fieldName, fieldTypeName, Collections.<String,Object>emptyMap()));
}
}
if (newFields.isEmpty()) {
// nothing to do - no fields will be added - exit from the retry loop
log.debug("No fields to add to the schema.");
break;
}
if (log.isDebugEnabled()) {
StringBuilder builder = new StringBuilder();
builder.append("Fields to be added to the schema: [");
boolean isFirst = true;
for (SchemaField field : newFields) {
builder.append(isFirst ? "" : ",");
isFirst = false;
builder.append(field.getName());
builder.append("{type=").append(field.getType().getTypeName()).append("}");
}
builder.append("]");
log.debug(builder.toString());
}
try {
IndexSchema newSchema = oldSchema.addFields(newFields);
cmd.getReq().getCore().setLatestSchema(newSchema);
cmd.getReq().updateSchemaToLatest();
log.debug("Successfully added field(s) to the schema.");
break; // success - exit from the retry loop
} catch(ManagedIndexSchema.FieldExistsException e) {
log.debug("At least one field to be added already exists in the schema - retrying.");
// No action: at least one field to be added already exists in the schema, so retry
}
}
super.processAdd(cmd);
}
private String mapValueClassesToFieldType(SolrInputField field) {
NEXT_TYPE_MAPPING: for (TypeMapping typeMapping : typeMappings) {
NEXT_FIELD_VALUE: for (Object fieldValue : field.getValues()) {
for (Class<?> valueClass : typeMapping.valueClasses) {
if (valueClass.isInstance(fieldValue)) {
continue NEXT_FIELD_VALUE;
}
}
// This fieldValue is not an instance of any of this fieldType's valueClass-s
continue NEXT_TYPE_MAPPING;
}
// Success! Each of this field's values is an instance of one of this fieldType's valueClass-s
return typeMapping.fieldTypeName;
}
// At least one of this field's values is not an instance of any configured fieldType's valueClass-s
return defaultFieldType;
}
}
}

View File

@ -165,7 +165,32 @@ public abstract class FieldMutatingUpdateProcessorFactory
return params; return params;
} }
public static Collection<SelectorParams> parseSelectorExclusionParams(NamedList args) {
Collection<SelectorParams> exclusions = new ArrayList<SelectorParams>();
List<Object> excList = args.getAll("exclude");
for (Object excObj : excList) {
if (null == excObj) {
throw new SolrException
(SERVER_ERROR, "'exclude' init param can not be null");
}
if (! (excObj instanceof NamedList) ) {
throw new SolrException
(SERVER_ERROR, "'exclude' init param must be <lst/>");
}
NamedList exc = (NamedList) excObj;
exclusions.add(parseSelectorParams(exc));
if (0 < exc.size()) {
throw new SolrException(SERVER_ERROR,
"Unexpected 'exclude' init sub-param(s): '" +
args.getName(0) + "'");
}
// call once per instance
args.remove("exclude");
}
return exclusions;
}
/** /**
* Handles common initialization related to source fields for * Handles common initialization related to source fields for
@ -179,27 +204,8 @@ public abstract class FieldMutatingUpdateProcessorFactory
public void init(NamedList args) { public void init(NamedList args) {
inclusions = parseSelectorParams(args); inclusions = parseSelectorParams(args);
exclusions = parseSelectorExclusionParams(args);
List<Object> excList = args.getAll("exclude");
for (Object excObj : excList) {
if (null == excObj) {
throw new SolrException
(SERVER_ERROR, "'exclude' init param can not be null");
}
if (! (excObj instanceof NamedList) ) {
throw new SolrException
(SERVER_ERROR, "'exclude' init param must be <lst/>");
}
NamedList exc = (NamedList) excObj;
exclusions.add(parseSelectorParams(exc));
if (0 < exc.size()) {
throw new SolrException(SERVER_ERROR,
"Unexpected 'exclude' init sub-param(s): '" +
args.getName(0) + "'");
}
// call once per instance
args.remove("exclude");
}
if (0 < args.size()) { if (0 < args.size()) {
throw new SolrException(SERVER_ERROR, throw new SolrException(SERVER_ERROR,
"Unexpected init param(s): '" + "Unexpected init param(s): '" +

View File

@ -0,0 +1,49 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<schema name="add-schema-fields-update-processor" version="1.5">
<types>
<fieldType name="tint" class="solr.TrieIntField" precisionStep="8" multiValued="true" positionIncrementGap="0"/>
<fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" multiValued="true" positionIncrementGap="0"/>
<fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" multiValued="true" positionIncrementGap="0"/>
<fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" multiValued="true" positionIncrementGap="0"/>
<fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" multiValued="true" positionIncrementGap="0"/>
<fieldtype name="boolean" class="solr.BoolField" sortMissingLast="true" multiValued="true"/>
<fieldtype name="string" class="solr.StrField" sortMissingLast="true"/>
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="text" class="solr.TextField" multiValued="true" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
</types>
<fields>
<field name="id" type="string" indexed="true" stored="true" multiValued="false" required="true"/>
<field name="_version_" type="long" indexed="true" stored="true"/>
<dynamicField name="*_t" type="text" indexed="true" stored="true"/>
<dynamicField name="*_ti" type="tint" indexed="true" stored="true"/>
<dynamicField name="*_tl" type="tlong" indexed="true" stored="true"/>
<dynamicField name="*_tf" type="tfloat" indexed="true" stored="true"/>
<dynamicField name="*_td" type="tdouble" indexed="true" stored="true"/>
<dynamicField name="*_tdt" type="tdate" indexed="true" stored="true"/>
</fields>
<uniqueKey>id</uniqueKey>
</schema>

View File

@ -0,0 +1,155 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!--
Test Config that enumerates many different parsing update processor chain
configurations.
-->
<config>
<luceneMatchVersion>${tests.luceneMatchVersion:LUCENE_CURRENT}</luceneMatchVersion>
<requestHandler name="standard" class="solr.StandardRequestHandler"></requestHandler>
<directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.RAMDirectoryFactory}"/>
<schemaFactory class="ManagedIndexSchemaFactory">
<bool name="mutable">true</bool>
<str name="managedSchemaResourceName">managed-schema</str>
</schemaFactory>
<updateRequestProcessorChain name="add-fields-no-run-processor">
<processor class="solr.AddSchemaFieldsUpdateProcessorFactory">
<str name="defaultFieldType">text</str>
<lst name="typeMapping">
<str name="valueClass">java.lang.Boolean</str>
<str name="fieldType">boolean</str>
</lst>
<lst name="typeMapping">
<str name="valueClass">java.lang.Integer</str>
<str name="fieldType">tint</str>
</lst>
<lst name="typeMapping">
<str name="valueClass">java.lang.Float</str>
<str name="fieldType">tfloat</str>
</lst>
<lst name="typeMapping">
<str name="valueClass">java.util.Date</str>
<str name="fieldType">tdate</str>
</lst>
<lst name="typeMapping">
<str name="valueClass">java.lang.Long</str>
<str name="valueClass">java.lang.Integer</str>
<str name="fieldType">tlong</str>
</lst>
<lst name="typeMapping">
<arr name="valueClass">
<str>java.lang.Double</str>
<str>java.lang.Float</str>
</arr>
<str name="fieldType">tdouble</str>
</lst>
</processor>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="add-fields">
<processor class="solr.AddSchemaFieldsUpdateProcessorFactory">
<str name="defaultFieldType">text</str>
<lst name="typeMapping">
<str name="valueClass">java.lang.Boolean</str>
<str name="fieldType">boolean</str>
</lst>
<lst name="typeMapping">
<str name="valueClass">java.lang.Integer</str>
<str name="fieldType">tint</str>
</lst>
<lst name="typeMapping">
<str name="valueClass">java.lang.Float</str>
<str name="fieldType">tfloat</str>
</lst>
<lst name="typeMapping">
<str name="valueClass">java.util.Date</str>
<str name="fieldType">tdate</str>
</lst>
<lst name="typeMapping">
<str name="valueClass">java.lang.Long</str>
<str name="valueClass">java.lang.Integer</str>
<str name="fieldType">tlong</str>
</lst>
<lst name="typeMapping">
<str name="valueClass">java.lang.Number</str>
<str name="fieldType">tdouble</str>
</lst>
</processor>
<processor class="solr.RunUpdateProcessorFactory" />
</updateRequestProcessorChain>
<updateRequestProcessorChain name="parse-and-add-fields">
<processor class="solr.ParseBooleanFieldUpdateProcessorFactory"/>
<processor class="solr.ParseLongFieldUpdateProcessorFactory"/>
<processor class="solr.ParseDoubleFieldUpdateProcessorFactory"/>
<processor class="solr.ParseDateFieldUpdateProcessorFactory">
<arr name="format">
<str>yyyy-MM-dd'T'HH:mm:ss.SSSZ</str>
<str>yyyy-MM-dd'T'HH:mm:ss,SSSZ</str>
<str>yyyy-MM-dd'T'HH:mm:ss.SSS</str>
<str>yyyy-MM-dd'T'HH:mm:ss,SSS</str>
<str>yyyy-MM-dd'T'HH:mm:ssZ</str>
<str>yyyy-MM-dd'T'HH:mm:ss</str>
<str>yyyy-MM-dd'T'HH:mmZ</str>
<str>yyyy-MM-dd'T'HH:mm</str>
<str>yyyy-MM-dd HH:mm:ss.SSSZ</str>
<str>yyyy-MM-dd HH:mm:ss,SSSZ</str>
<str>yyyy-MM-dd HH:mm:ss.SSS</str>
<str>yyyy-MM-dd HH:mm:ss,SSS</str>
<str>yyyy-MM-dd HH:mm:ssZ</str>
<str>yyyy-MM-dd HH:mm:ss</str>
<str>yyyy-MM-dd HH:mmZ</str>
<str>yyyy-MM-dd HH:mm</str>
<str>yyyy-MM-dd</str>
</arr>
</processor>
<processor class="solr.AddSchemaFieldsUpdateProcessorFactory">
<str name="defaultFieldType">text</str>
<lst name="typeMapping">
<str name="valueClass">java.lang.Boolean</str>
<str name="fieldType">boolean</str>
</lst>
<lst name="typeMapping">
<str name="valueClass">java.lang.Integer</str>
<str name="fieldType">tint</str>
</lst>
<lst name="typeMapping">
<str name="valueClass">java.lang.Float</str>
<str name="fieldType">tfloat</str>
</lst>
<lst name="typeMapping">
<str name="valueClass">java.util.Date</str>
<str name="fieldType">tdate</str>
</lst>
<lst name="typeMapping">
<str name="valueClass">java.lang.Long</str>
<str name="valueClass">java.lang.Integer</str>
<str name="fieldType">tlong</str>
</lst>
<lst name="typeMapping">
<str name="valueClass">java.lang.Number</str>
<str name="fieldType">tdouble</str>
</lst>
</processor>
<processor class="solr.RunUpdateProcessorFactory" />
</updateRequestProcessorChain>
</config>

View File

@ -0,0 +1,223 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.update.processor;
import org.apache.commons.io.FileUtils;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.TestManagedSchema;
import org.joda.time.DateTime;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.joda.time.format.ISODateTimeFormat;
import org.junit.After;
import org.junit.Before;
import java.io.File;
import java.util.Date;
/**
* Tests for the field mutating update processors
* that parse Dates, Longs, Doubles, and Booleans.
*/
public class AddSchemaFieldsUpdateProcessorFactoryTest extends UpdateProcessorTestBase {
private static final String SOLRCONFIG_XML = "solrconfig-add-schema-fields-update-processor-chains.xml";
private static final String SCHEMA_XML = "schema-add-schema-fields-update-processor.xml";
private static File tmpSolrHome;
private static File tmpConfDir;
private static final String collection = "collection1";
private static final String confDir = collection + "/conf";
@Before
private void initManagedSchemaCore() throws Exception {
createTempDir();
final String tmpSolrHomePath
= TEMP_DIR + File.separator + TestManagedSchema.class.getSimpleName() + System.currentTimeMillis();
tmpSolrHome = new File(tmpSolrHomePath).getAbsoluteFile();
tmpConfDir = new File(tmpSolrHome, confDir);
File testHomeConfDir = new File(TEST_HOME(), confDir);
FileUtils.copyFileToDirectory(new File(testHomeConfDir, SOLRCONFIG_XML), tmpConfDir);
FileUtils.copyFileToDirectory(new File(testHomeConfDir, SCHEMA_XML), tmpConfDir);
// initCore will trigger an upgrade to managed schema, since the solrconfig*.xml has
// <schemaFactory class="ManagedIndexSchemaFactory" ... />
initCore(SOLRCONFIG_XML, SCHEMA_XML, tmpSolrHome.getPath());
}
@After
private void deleteCoreAndTempSolrHomeDirectory() throws Exception {
deleteCore();
FileUtils.deleteDirectory(tmpSolrHome);
}
public void testSingleField() throws Exception {
IndexSchema schema = h.getCore().getLatestSchema();
final String fieldName = "newfield1";
assertNull(schema.getFieldOrNull(fieldName));
String dateString = "2010-11-12T13:14:15.168Z";
DateTimeFormatter dateTimeFormatter = ISODateTimeFormat.dateTime();
Date date = dateTimeFormatter.parseDateTime(dateString).toDate();
SolrInputDocument d = processAdd("add-fields-no-run-processor", doc(f("id", "1"), f(fieldName, date)));
assertNotNull(d);
schema = h.getCore().getLatestSchema();
assertNotNull(schema.getFieldOrNull(fieldName));
assertEquals("tdate", schema.getFieldType(fieldName).getTypeName());
}
public void testSingleFieldRoundTrip() throws Exception {
IndexSchema schema = h.getCore().getLatestSchema();
final String fieldName = "newfield2";
assertNull(schema.getFieldOrNull(fieldName));
Float floatValue = -13258.992f;
SolrInputDocument d = processAdd("add-fields", doc(f("id", "2"), f(fieldName, floatValue)));
assertNotNull(d);
schema = h.getCore().getLatestSchema();
assertNotNull(schema.getFieldOrNull(fieldName));
assertEquals("tfloat", schema.getFieldType(fieldName).getTypeName());
assertU(commit());
assertQ(req("id:2"), "//arr[@name='" + fieldName + "']/float[.='" + floatValue.toString() + "']");
}
public void testSingleFieldMixedFieldTypesRoundTrip() throws Exception {
IndexSchema schema = h.getCore().getLatestSchema();
final String fieldName = "newfield3";
assertNull(schema.getFieldOrNull(fieldName));
Float fieldValue1 = -13258.0f;
Double fieldValue2 = 8.4828800808E10;
SolrInputDocument d = processAdd
("add-fields", doc(f("id", "3"), f(fieldName, fieldValue1, fieldValue2)));
assertNotNull(d);
schema = h.getCore().getLatestSchema();
assertNotNull(schema.getFieldOrNull(fieldName));
assertEquals("tdouble", schema.getFieldType(fieldName).getTypeName());
assertU(commit());
assertQ(req("id:3")
,"//arr[@name='" + fieldName + "']/double[.='" + fieldValue1.toString() + "']"
,"//arr[@name='" + fieldName + "']/double[.='" + fieldValue2.toString() + "']");
}
public void testSingleFieldDefaultFieldTypeRoundTrip() throws Exception {
IndexSchema schema = h.getCore().getLatestSchema();
final String fieldName = "newfield4";
assertNull(schema.getFieldOrNull(fieldName));
Float fieldValue1 = -13258.0f;
Double fieldValue2 = 8.4828800808E10;
String fieldValue3 = "blah blah";
SolrInputDocument d = processAdd
("add-fields", doc(f("id", "4"), f(fieldName, fieldValue1, fieldValue2, fieldValue3)));
assertNotNull(d);
schema = h.getCore().getLatestSchema();
assertNotNull(schema.getFieldOrNull(fieldName));
assertEquals("text", schema.getFieldType(fieldName).getTypeName());
assertU(commit());
assertQ(req("id:4")
,"//arr[@name='" + fieldName + "']/str[.='" + fieldValue1.toString() + "']"
,"//arr[@name='" + fieldName + "']/str[.='" + fieldValue2.toString() + "']"
,"//arr[@name='" + fieldName + "']/str[.='" + fieldValue3.toString() + "']"
);
}
public void testMultipleFieldsRoundTrip() throws Exception {
IndexSchema schema = h.getCore().getLatestSchema();
final String fieldName1 = "newfield5";
final String fieldName2 = "newfield6";
assertNull(schema.getFieldOrNull(fieldName1));
assertNull(schema.getFieldOrNull(fieldName2));
Float field1Value1 = -13258.0f;
Double field1Value2 = 8.4828800808E10;
Long field1Value3 = 999L;
Integer field2Value1 = 55123;
Long field2Value2 = 1234567890123456789L;
SolrInputDocument d = processAdd
("add-fields", doc(f("id", "5"), f(fieldName1, field1Value1, field1Value2, field1Value3),
f(fieldName2, field2Value1, field2Value2)));
assertNotNull(d);
schema = h.getCore().getLatestSchema();
assertNotNull(schema.getFieldOrNull(fieldName1));
assertNotNull(schema.getFieldOrNull(fieldName2));
assertEquals("tdouble", schema.getFieldType(fieldName1).getTypeName());
assertEquals("tlong", schema.getFieldType(fieldName2).getTypeName());
assertU(commit());
assertQ(req("id:5")
,"//arr[@name='" + fieldName1 + "']/double[.='" + field1Value1.toString() + "']"
,"//arr[@name='" + fieldName1 + "']/double[.='" + field1Value2.toString() + "']"
,"//arr[@name='" + fieldName1 + "']/double[.='" + field1Value3.doubleValue() + "']"
,"//arr[@name='" + fieldName2 + "']/long[.='" + field2Value1.toString() + "']"
,"//arr[@name='" + fieldName2 + "']/long[.='" + field2Value2.toString() + "']");
}
public void testParseAndAddMultipleFieldsRoundTrip() throws Exception {
IndexSchema schema = h.getCore().getLatestSchema();
final String fieldName1 = "newfield7";
final String fieldName2 = "newfield8";
final String fieldName3 = "newfield9";
final String fieldName4 = "newfield10";
assertNull(schema.getFieldOrNull(fieldName1));
assertNull(schema.getFieldOrNull(fieldName2));
assertNull(schema.getFieldOrNull(fieldName3));
assertNull(schema.getFieldOrNull(fieldName4));
String field1String1 = "-13,258.0";
Float field1Value1 = -13258.0f;
String field1String2 = "84,828,800,808.0";
Double field1Value2 = 8.4828800808E10;
String field1String3 = "999";
Long field1Value3 = 999L;
String field2String1 = "55,123";
Integer field2Value1 = 55123;
String field2String2 = "1,234,567,890,123,456,789";
Long field2Value2 = 1234567890123456789L;
String field3String1 = "blah-blah";
String field3Value1 = field3String1;
String field3String2 = "-5.28E-3";
Double field3Value2 = -5.28E-3;
String field4String1 = "1999-04-17 17:42";
DateTimeFormatter dateTimeFormatter = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm").withZoneUTC();
DateTime dateTime = dateTimeFormatter.parseDateTime(field4String1);
Date field4Value1 = dateTime.toDate();
DateTimeFormatter dateTimeFormatter2 = DateTimeFormat.forPattern("yyyy-MM-dd'T'HH:mm:ss").withZoneUTC();
String field4Value1String = dateTimeFormatter2.print(dateTime) + "Z";
SolrInputDocument d = processAdd
("parse-and-add-fields", doc(f("id", "6"), f(fieldName1, field1String1, field1String2, field1String3),
f(fieldName2, field2String1, field2String2),
f(fieldName3, field3String1, field3String2),
f(fieldName4, field4String1)));
assertNotNull(d);
schema = h.getCore().getLatestSchema();
assertNotNull(schema.getFieldOrNull(fieldName1));
assertNotNull(schema.getFieldOrNull(fieldName2));
assertNotNull(schema.getFieldOrNull(fieldName3));
assertNotNull(schema.getFieldOrNull(fieldName4));
assertEquals("tdouble", schema.getFieldType(fieldName1).getTypeName());
assertEquals("tlong", schema.getFieldType(fieldName2).getTypeName());
assertEquals("text", schema.getFieldType(fieldName3).getTypeName());
assertEquals("tdate", schema.getFieldType(fieldName4).getTypeName());
assertU(commit());
assertQ(req("id:6")
,"//arr[@name='" + fieldName1 + "']/double[.='" + field1Value1.toString() + "']"
,"//arr[@name='" + fieldName1 + "']/double[.='" + field1Value2.toString() + "']"
,"//arr[@name='" + fieldName1 + "']/double[.='" + field1Value3.doubleValue() + "']"
,"//arr[@name='" + fieldName2 + "']/long[.='" + field2Value1.toString() + "']"
,"//arr[@name='" + fieldName2 + "']/long[.='" + field2Value2.toString() + "']"
,"//arr[@name='" + fieldName3 + "']/str[.='" + field3String1 + "']"
,"//arr[@name='" + fieldName3 + "']/str[.='" + field3String2 + "']"
,"//arr[@name='" + fieldName4 + "']/date[.='" + field4Value1String + "']");
}
}