SOLR-4892: Add field update processors to parse/convert String-typed fields to Date, Number, and Boolean

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1497165 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Steven Rowe 2013-06-27 00:44:27 +00:00
parent 4ac141d204
commit d6d65851cf
38 changed files with 2552 additions and 16 deletions

View File

@ -182,6 +182,11 @@
<artifactId>commons-io</artifactId> <artifactId>commons-io</artifactId>
<version>${commons-io.version}</version> <version>${commons-io.version}</version>
</dependency> </dependency>
<dependency>
<groupId>joda-time</groupId>
<artifactId>joda-time</artifactId>
<version>2.2</version>
</dependency>
<dependency> <dependency>
<groupId>org.apache.httpcomponents</groupId> <groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId> <artifactId>httpclient</artifactId>

View File

@ -136,6 +136,10 @@
<groupId>commons-fileupload</groupId> <groupId>commons-fileupload</groupId>
<artifactId>commons-fileupload</artifactId> <artifactId>commons-fileupload</artifactId>
</dependency> </dependency>
<dependency>
<groupId>joda-time</groupId>
<artifactId>joda-time</artifactId>
</dependency>
<dependency> <dependency>
<groupId>org.restlet.jee</groupId> <groupId>org.restlet.jee</groupId>
<artifactId>org.restlet</artifactId> <artifactId>org.restlet</artifactId>

View File

@ -120,6 +120,18 @@ New Features
* SOLR-4916: Add support to write and read Solr index files and transaction log * SOLR-4916: Add support to write and read Solr index files and transaction log
files to and from HDFS. (phunt, Mark Miller, Greg Chanan) files to and from HDFS. (phunt, Mark Miller, Greg Chanan)
* SOLR-4892: Add FieldMutatingUpdateProcessorFactory subclasses
Parse{Date,Integer,Long,Float,Double,Boolean}UpdateProcessorFactory. These
factories have a default selector that matches all fields that either dont
match any schema field, or are in the schema with the corresponding
typeClass. If they see a value that is not a CharSequence, or can't parse
the value, they leave it as is. For multi-valued fields, these processors
will not convert any values unless all are first successfully parsed, or
already are instances of the target class. Ordering the processors, e.g.
[Boolean, Long, Double, Date] will allow e.g. values ["2", "5", "8.6"] to
be left alone by the Boolean and Long processors, but then converted by the
Double processor. (Steve Rowe, hossman)
Bug Fixes Bug Fixes
---------------------- ----------------------

View File

@ -35,6 +35,7 @@
<dependency org="javax.servlet" name="javax.servlet-api" rev="3.0.1" transitive="false"/> <dependency org="javax.servlet" name="javax.servlet-api" rev="3.0.1" transitive="false"/>
<dependency org="org.restlet.jee" name="org.restlet" rev="2.1.1" transitive="false"/> <dependency org="org.restlet.jee" name="org.restlet" rev="2.1.1" transitive="false"/>
<dependency org="org.restlet.jee" name="org.restlet.ext.servlet" rev="2.1.1" transitive="false"/> <dependency org="org.restlet.jee" name="org.restlet.ext.servlet" rev="2.1.1" transitive="false"/>
<dependency org="joda-time" name="joda-time" rev="2.2" transitive="false"/>
<dependency org="org.apache.hadoop" name="hadoop-common" rev="&hadoop.version;" transitive="false"/> <dependency org="org.apache.hadoop" name="hadoop-common" rev="&hadoop.version;" transitive="false"/>
<dependency org="org.apache.hadoop" name="hadoop-hdfs" rev="&hadoop.version;" transitive="false"/> <dependency org="org.apache.hadoop" name="hadoop-hdfs" rev="&hadoop.version;" transitive="false"/>

View File

@ -111,7 +111,7 @@ import java.util.*;
* @see <a href="http://www.w3.org/TR/xmlschema-2/#dateTime">XML schema part 2</a> * @see <a href="http://www.w3.org/TR/xmlschema-2/#dateTime">XML schema part 2</a>
* @deprecated {@link TrieDateField} is recomended for all new schemas * @deprecated {@link TrieDateField} is recomended for all new schemas
*/ */
public class DateField extends PrimitiveFieldType { public class DateField extends PrimitiveFieldType implements DateValueFieldType {
public static TimeZone UTC = TimeZone.getTimeZone("UTC"); public static TimeZone UTC = TimeZone.getTimeZone("UTC");

View File

@ -0,0 +1,24 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.schema;
/**
* Marker interface for Date-valued field types.
*/
public interface DateValueFieldType {
}

View File

@ -45,7 +45,7 @@ import org.apache.solr.search.QParser;
* *
* @see TrieDoubleField * @see TrieDoubleField
*/ */
public class DoubleField extends PrimitiveFieldType { public class DoubleField extends PrimitiveFieldType implements DoubleValueFieldType {
private static final FieldCache.DoubleParser PARSER = new FieldCache.DoubleParser() { private static final FieldCache.DoubleParser PARSER = new FieldCache.DoubleParser() {

View File

@ -0,0 +1,24 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.schema;
/**
* Marker interface for double-valued field types.
*/
public interface DoubleValueFieldType extends NumericValueFieldType {
}

View File

@ -46,7 +46,7 @@ import java.io.IOException;
* *
* @see TrieFloatField * @see TrieFloatField
*/ */
public class FloatField extends PrimitiveFieldType { public class FloatField extends PrimitiveFieldType implements FloatValueFieldType {
private static final FieldCache.FloatParser PARSER = new FieldCache.FloatParser() { private static final FieldCache.FloatParser PARSER = new FieldCache.FloatParser() {

View File

@ -0,0 +1,24 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.schema;
/**
* Marker interface for float-valued field types.
*/
public interface FloatValueFieldType extends NumericValueFieldType {
}

View File

@ -46,7 +46,7 @@ import java.io.IOException;
* *
* @see TrieIntField * @see TrieIntField
*/ */
public class IntField extends PrimitiveFieldType { public class IntField extends PrimitiveFieldType implements IntValueFieldType {
private static final FieldCache.IntParser PARSER = new FieldCache.IntParser() { private static final FieldCache.IntParser PARSER = new FieldCache.IntParser() {

View File

@ -0,0 +1,25 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.schema;
/**
* Marker interface for int-valued field types.
*/
public interface IntValueFieldType extends NumericValueFieldType {
}

View File

@ -46,7 +46,7 @@ import java.util.Map;
* *
* @see TrieLongField * @see TrieLongField
*/ */
public class LongField extends PrimitiveFieldType { public class LongField extends PrimitiveFieldType implements LongValueFieldType {
private static final FieldCache.LongParser PARSER = new FieldCache.LongParser() { private static final FieldCache.LongParser PARSER = new FieldCache.LongParser() {

View File

@ -0,0 +1,24 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.schema;
/**
* Marker interface for long-valued field types.
*/
public interface LongValueFieldType extends NumericValueFieldType {
}

View File

@ -0,0 +1,24 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.schema;
/**
* Marker interface for numeric-valued field types.
*/
public interface NumericValueFieldType {
}

View File

@ -54,7 +54,7 @@ import java.io.IOException;
* @deprecated use {@link DoubleField} or {@link TrieDoubleField} - will be removed in 5.x * @deprecated use {@link DoubleField} or {@link TrieDoubleField} - will be removed in 5.x
*/ */
@Deprecated @Deprecated
public class SortableDoubleField extends PrimitiveFieldType { public class SortableDoubleField extends PrimitiveFieldType implements DoubleValueFieldType {
@Override @Override
public SortField getSortField(SchemaField field,boolean reverse) { public SortField getSortField(SchemaField field,boolean reverse) {
return getStringSort(field,reverse); return getStringSort(field,reverse);

View File

@ -55,7 +55,7 @@ import java.io.IOException;
* @deprecated use {@link FloatField} or {@link TrieFloatField} - will be removed in 5.x * @deprecated use {@link FloatField} or {@link TrieFloatField} - will be removed in 5.x
*/ */
@Deprecated @Deprecated
public class SortableFloatField extends PrimitiveFieldType { public class SortableFloatField extends PrimitiveFieldType implements FloatValueFieldType {
@Override @Override
public SortField getSortField(SchemaField field,boolean reverse) { public SortField getSortField(SchemaField field,boolean reverse) {
return getStringSort(field,reverse); return getStringSort(field,reverse);

View File

@ -55,7 +55,7 @@ import java.io.IOException;
* @deprecated use {@link IntField} or {@link TrieIntField} - will be removed in 5.x * @deprecated use {@link IntField} or {@link TrieIntField} - will be removed in 5.x
*/ */
@Deprecated @Deprecated
public class SortableIntField extends PrimitiveFieldType { public class SortableIntField extends PrimitiveFieldType implements IntValueFieldType {
@Override @Override
public SortField getSortField(SchemaField field,boolean reverse) { public SortField getSortField(SchemaField field,boolean reverse) {
return getStringSort(field,reverse); return getStringSort(field,reverse);

View File

@ -55,7 +55,7 @@ import java.io.IOException;
* @see DateField * @see DateField
* @see TrieField * @see TrieField
*/ */
public class TrieDateField extends DateField { public class TrieDateField extends DateField implements DateValueFieldType {
final TrieField wrappedField = new TrieField() {{ final TrieField wrappedField = new TrieField() {{
type = TrieTypes.DATE; type = TrieTypes.DATE;

View File

@ -33,7 +33,7 @@ package org.apache.solr.schema;
* @see Double * @see Double
* @see <a href="http://java.sun.com/docs/books/jls/third_edition/html/typesValues.html#4.2.3">Java Language Specification, s4.2.3</a> * @see <a href="http://java.sun.com/docs/books/jls/third_edition/html/typesValues.html#4.2.3">Java Language Specification, s4.2.3</a>
*/ */
public class TrieDoubleField extends TrieField { public class TrieDoubleField extends TrieField implements DoubleValueFieldType {
{ {
type=TrieTypes.DOUBLE; type=TrieTypes.DOUBLE;
} }

View File

@ -33,7 +33,7 @@ package org.apache.solr.schema;
* @see Float * @see Float
* @see <a href="http://java.sun.com/docs/books/jls/third_edition/html/typesValues.html#4.2.3">Java Language Specification, s4.2.3</a> * @see <a href="http://java.sun.com/docs/books/jls/third_edition/html/typesValues.html#4.2.3">Java Language Specification, s4.2.3</a>
*/ */
public class TrieFloatField extends TrieField { public class TrieFloatField extends TrieField implements FloatValueFieldType {
{ {
type=TrieTypes.FLOAT; type=TrieTypes.FLOAT;
} }

View File

@ -27,7 +27,7 @@ package org.apache.solr.schema;
* *
* @see Integer * @see Integer
*/ */
public class TrieIntField extends TrieField { public class TrieIntField extends TrieField implements IntValueFieldType {
{ {
type=TrieTypes.INTEGER; type=TrieTypes.INTEGER;
} }

View File

@ -27,7 +27,7 @@ package org.apache.solr.schema;
* *
* @see Long * @see Long
*/ */
public class TrieLongField extends TrieField { public class TrieLongField extends TrieField implements LongValueFieldType {
{ {
type=TrieTypes.LONG; type=TrieTypes.LONG;
} }

View File

@ -0,0 +1,116 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.update.processor;
import org.apache.solr.common.SolrInputField;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
/**
* Abstract subclass of FieldMutatingUpdateProcessor for implementing
* UpdateProcessors that will mutate all individual values of a selected
* field independently. If not all individual values are acceptable
* - i.e., mutateValue(srcVal) returns {@link #SKIP_FIELD_VALUE_LIST_SINGLETON}
* for at least one value - then none of the values are mutated:
* mutate(srcField) will return srcField.
*
* @see FieldMutatingUpdateProcessorFactory
* @see FieldValueMutatingUpdateProcessor
*/
public abstract class AllValuesOrNoneFieldMutatingUpdateProcessor extends FieldMutatingUpdateProcessor {
private static final Logger log = LoggerFactory.getLogger(AllValuesOrNoneFieldMutatingUpdateProcessor.class);
public static final Object DELETE_VALUE_SINGLETON = new Object() {
@Override
public String toString() {
return "!!Singleton Object Triggering Value Deletion!!";
}
};
public static final Object SKIP_FIELD_VALUE_LIST_SINGLETON= new Object() {
@Override
public String toString() {
return "!!Singleton Object Triggering Skipping Field Mutation!!";
}
};
public AllValuesOrNoneFieldMutatingUpdateProcessor(FieldNameSelector selector, UpdateRequestProcessor next) {
super(selector, next);
}
/**
* Mutates individual values of a field as needed, or returns the original
* value.
*
* @param srcVal a value from a matched field which should be mutated
* @return the value to use as a replacement for src, or
* <code>DELETE_VALUE_SINGLETON</code> to indicate that the value
* should be removed completely, or
* <code>SKIP_FIELD_VALUE_LIST_SINGLETON</code> to indicate that
* a field value is not consistent with
* @see #DELETE_VALUE_SINGLETON
* @see #SKIP_FIELD_VALUE_LIST_SINGLETON
*/
protected abstract Object mutateValue(final Object srcVal);
protected final SolrInputField mutate(final SolrInputField srcField) {
List<String> messages = null;
SolrInputField result = new SolrInputField(srcField.getName());
for (final Object srcVal : srcField.getValues()) {
final Object destVal = mutateValue(srcVal);
if (SKIP_FIELD_VALUE_LIST_SINGLETON == destVal) {
log.debug("field '{}' {} value '{}' is not mutatable, so no values will be mutated",
new Object[] { srcField.getName(), srcVal.getClass().getSimpleName(), srcVal });
return srcField;
}
if (DELETE_VALUE_SINGLETON == destVal) {
if (log.isDebugEnabled()) {
if (null == messages) {
messages = new ArrayList<String>();
}
messages.add(String.format(Locale.ROOT, "removing value from field '%s': %s '%s'",
srcField.getName(), srcVal.getClass().getSimpleName(), srcVal));
}
} else {
if (log.isDebugEnabled()) {
if (null == messages) {
messages = new ArrayList<String>();
}
messages.add(String.format(Locale.ROOT, "replace value from field '%s': %s '%s' with %s '%s'",
srcField.getName(), srcVal.getClass().getSimpleName(), srcVal,
destVal.getClass().getSimpleName(), destVal));
}
result.addValue(destVal, 1.0F);
}
}
result.setBoost(srcField.getBoost());
if (null != messages && log.isDebugEnabled()) {
for (String message : messages) {
log.debug(message);
}
}
return 0 == result.getValueCount() ? null : result;
}
}

View File

@ -108,7 +108,7 @@ public abstract class FieldMutatingUpdateProcessor
// for now, don't allow it. // for now, don't allow it.
if (! fname.equals(dest.getName()) ) { if (! fname.equals(dest.getName()) ) {
throw new SolrException(SERVER_ERROR, throw new SolrException(SERVER_ERROR,
"mutute returned field with different name: " "mutate returned field with different name: "
+ fname + " => " + dest.getName()); + fname + " => " + dest.getName());
} }
doc.put(dest.getName(), dest); doc.put(dest.getName(), dest);
@ -118,7 +118,7 @@ public abstract class FieldMutatingUpdateProcessor
} }
/** /**
* Interface for idenfifying which fileds should be mutated * Interface for identifying which fields should be mutated
*/ */
public static interface FieldNameSelector { public static interface FieldNameSelector {
public boolean shouldMutate(final String fieldName); public boolean shouldMutate(final String fieldName);

View File

@ -65,7 +65,6 @@ import org.apache.solr.util.plugin.SolrCoreAware;
* </p> * </p>
* <ul> * <ul>
* <li><code>fieldNameMatchesSchemaField</code> - selecting specific fields based on whether or not they match a schema field</li> * <li><code>fieldNameMatchesSchemaField</code> - selecting specific fields based on whether or not they match a schema field</li>
</li>
* </ul> * </ul>
* <p> * <p>
* One or more <code>excludes</code> &lt;lst&gt; params may also be specified, * One or more <code>excludes</code> &lt;lst&gt; params may also be specified,

View File

@ -0,0 +1,157 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.update.processor;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.BoolField;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.Locale;
import java.util.Set;
/**
* <p>
* Attempts to mutate selected fields that have only CharSequence-typed values
* into Boolean values.
* </p>
* <p>
* The default selection behavior is to mutate both those fields that don't match
* a schema field, as well as those fields that do match a schema field and have
* a field type that uses class solr.BooleanField.
* </p>
* <p>
* If all values are parseable as boolean (or are already Boolean), then the field
* will be mutated, replacing each value with its parsed Boolean equivalent;
* otherwise, no mutation will occur.
* </p>
* <p>
* The default true and false values are "true" and "false", respectively, and match
* case-insensitively. The following configuration changes the acceptable values, and
* requires a case-sensitive match - note that either individual &lt;str&gt; elements
* or &lt;arr&gt;-s of &lt;str&gt; elements may be used to specify the trueValue-s
* and falseValue-s:
* </p>
*
* <pre class="prettyprint">
* &lt;processor class="solr.ParseBooleanFieldUpdateProcessorFactory"&gt;
* &lt;str name="caseSensitive"&gt;true&lt;/str&gt;
* &lt;str name="trueValue"&gt;True&lt;/str&gt;
* &lt;str name="trueValue"&gt;Yes&lt;/str&gt;
* &lt;arr name="falseValue"&gt;
* &lt;str&gt;False&lt;/str&gt;
* &lt;str&gt;No&lt;/str&gt;
* &lt:/arr&gt;
* &lt;/processor&gt;</pre>
*/
public class ParseBooleanFieldUpdateProcessorFactory extends FieldMutatingUpdateProcessorFactory {
private static final String TRUE_VALUES_PARAM = "trueValue";
private static final String FALSE_VALUES_PARAM = "falseValue";
private static final String CASE_SENSITIVE_PARAM = "caseSensitive";
private Set<String> trueValues = new HashSet<String>(Arrays.asList(new String[] { "true" }));
private Set<String> falseValues = new HashSet<String>(Arrays.asList(new String[] { "false" }));
private boolean caseSensitive = false;
@Override
public UpdateRequestProcessor getInstance(SolrQueryRequest req,
SolrQueryResponse rsp,
UpdateRequestProcessor next) {
return new AllValuesOrNoneFieldMutatingUpdateProcessor(getSelector(), next) {
@Override
protected Object mutateValue(Object srcVal) {
if (srcVal instanceof CharSequence) {
String stringVal = caseSensitive ? srcVal.toString() : srcVal.toString().toLowerCase(Locale.ROOT);
if (trueValues.contains(stringVal)) {
return true;
} else if (falseValues.contains(stringVal)) {
return false;
} else {
return SKIP_FIELD_VALUE_LIST_SINGLETON;
}
}
if (srcVal instanceof Boolean) {
return srcVal;
}
return SKIP_FIELD_VALUE_LIST_SINGLETON;
}
};
}
@Override
public void init(NamedList args) {
Object caseSensitiveParam = args.remove(CASE_SENSITIVE_PARAM);
if (null != caseSensitiveParam) {
if (caseSensitiveParam instanceof Boolean) {
caseSensitive = (Boolean)caseSensitiveParam;
} else {
caseSensitive = Boolean.valueOf(caseSensitiveParam.toString());
}
}
Collection<String> trueValuesParam = oneOrMany(args, TRUE_VALUES_PARAM);
if ( ! trueValuesParam.isEmpty()) {
trueValues.clear();
for (String trueVal : trueValuesParam) {
trueValues.add(caseSensitive ? trueVal : trueVal.toLowerCase(Locale.ROOT));
}
}
Collection<String> falseValuesParam = oneOrMany(args, FALSE_VALUES_PARAM);
if ( ! falseValuesParam.isEmpty()) {
falseValues.clear();
for (String val : falseValuesParam) {
final String falseVal = caseSensitive ? val : val.toLowerCase(Locale.ROOT);
if (trueValues.contains(falseVal)) {
throw new SolrException(ErrorCode.SERVER_ERROR,
"Param '" + FALSE_VALUES_PARAM + "' contains a value also in param '" + TRUE_VALUES_PARAM
+ "': '" + val + "'");
}
falseValues.add(falseVal);
}
}
super.init(args);
}
/**
* Returns true if the field doesn't match any schema field or dynamic field,
* or if the matched field's type is BoolField
*/
@Override
public FieldMutatingUpdateProcessor.FieldNameSelector
getDefaultSelector(final SolrCore core) {
return new FieldMutatingUpdateProcessor.FieldNameSelector() {
@Override
public boolean shouldMutate(final String fieldName) {
final IndexSchema schema = core.getLatestSchema();
FieldType type = schema.getFieldTypeNoEx(fieldName);
return (null == type) || (type instanceof BoolField);
}
};
}
}

View File

@ -0,0 +1,179 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.update.processor;
import org.apache.commons.lang.LocaleUtils;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.DateValueFieldType;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
/**
* <p>
* Attempts to mutate selected fields that have only CharSequence-typed values
* into Date values. Solr will continue to index date/times in the UTC time
* zone, but the input date/times may be expressed using other time zones,
* and will be converted to UTC when they are mutated.
* </p>
* <p>
* The default selection behavior is to mutate both those fields that don't match
* a schema field, as well as those fields that match a schema field with a field
* type that uses class solr.DateField or a sub-class, including solr.TrieDateField.
* </p>
* <p>
* If all values are parseable as dates (or are already Date), then the field will
* be mutated, replacing each value with its parsed Date equivalent; otherwise, no
* mutation will occur.
* </p>
* <p>
* One or more date "format" specifiers must be specified. See
* <a href="http://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html"
* >Joda-time's DateTimeFormat javadocs</a> for a description of format strings.
* </p>
* <p>
* A default time zone name or offset may optionally be specified for those dates
* that don't include an explicit zone/offset. NOTE: three-letter zone
* designations like "EST" are not parseable (with the single exception of "UTC"),
* because they are ambiguous. If no default time zone is specified, UTC will be
* used. See <a href="http://en.wikipedia.org/wiki/List_of_tz_database_time_zones"
* >Wikipedia's list of TZ database time zone names</a>.
* </p>
* <p>
* The locale to use when parsing field values using the specified formats may
* optionally be specified. If no locale is configured, then {@link Locale#ROOT}
* will be used. The following configuration specifies the French/France locale and
* two date formats that will parse the strings "le mardi 8 janvier 2013" and
* "le 28 déc. 2010 à 15 h 30", respectively. Note that either individual &lt;str&gt;
* elements or &lt;arr&gt;-s of &lt;str&gt; elements may be used to specify the
* date format(s):
* </p>
*
* <pre class="prettyprint">
* &lt;processor class="solr.ParseDateFieldUpdateProcessorFactory"&gt;
* &lt;str name="defaultTimeZone"&gt;Europe/Paris&lt;/str&gt;
* &lt;str name="locale"&gt;fr_FR&lt;/str&gt;
* &lt;arr name="format"&gt;
* &lt;str&gt;'le' EEEE dd MMMM yyyy&lt;/str&gt;
* &lt;str&gt;'le' dd MMM. yyyy 'à' HH 'h' mm&lt;/str&gt;
* &lt;/arr&gt;
* &lt;/processor&gt;</pre>
*
* <p>
* See {@link Locale} for a description of acceptable language, country (optional)
* and variant (optional) values, joined with underscore(s).
* </p>
*/
public class ParseDateFieldUpdateProcessorFactory extends FieldMutatingUpdateProcessorFactory {
public static final Logger log = LoggerFactory.getLogger(ParseDateFieldUpdateProcessorFactory.class);
private static final String FORMATS_PARAM = "format";
private static final String DEFAULT_TIME_ZONE_PARAM = "defaultTimeZone";
private static final String LOCALE_PARAM = "locale";
private Map<String,DateTimeFormatter> formats = new LinkedHashMap<String,DateTimeFormatter>();
@Override
public UpdateRequestProcessor getInstance(SolrQueryRequest req,
SolrQueryResponse rsp,
UpdateRequestProcessor next) {
return new AllValuesOrNoneFieldMutatingUpdateProcessor(getSelector(), next) {
@Override
protected Object mutateValue(Object srcVal) {
if (srcVal instanceof CharSequence) {
String srcStringVal = srcVal.toString();
for (Map.Entry<String,DateTimeFormatter> format : formats.entrySet()) {
DateTimeFormatter parser = format.getValue();
try {
DateTime dateTime = parser.parseDateTime(srcStringVal);
return dateTime.withZone(DateTimeZone.UTC).toDate();
} catch (IllegalArgumentException e) {
log.debug("value '{}' is not parseable with format '{}'",
new Object[] { srcStringVal, format.getKey() });
}
}
log.debug("value '{}' was not parsed by any configured format, thus was not mutated", srcStringVal);
return SKIP_FIELD_VALUE_LIST_SINGLETON;
}
if (srcVal instanceof Date) {
return srcVal;
}
return SKIP_FIELD_VALUE_LIST_SINGLETON;
}
};
}
@Override
public void init(NamedList args) {
Locale locale = Locale.ROOT;
String localeParam = (String)args.remove(LOCALE_PARAM);
if (null != localeParam) {
locale = LocaleUtils.toLocale(localeParam);
}
Object defaultTimeZoneParam = args.remove(DEFAULT_TIME_ZONE_PARAM);
DateTimeZone defaultTimeZone = DateTimeZone.UTC;
if (null != defaultTimeZoneParam) {
defaultTimeZone = DateTimeZone.forID(defaultTimeZoneParam.toString());
}
Collection<String> formatsParam = oneOrMany(args, FORMATS_PARAM);
if (null != formatsParam) {
for (String value : formatsParam) {
formats.put(value, DateTimeFormat.forPattern(value).withZone(defaultTimeZone).withLocale(locale));
}
}
super.init(args);
}
/**
* Returns true if the field doesn't match any schema field or dynamic field,
* or if the matched field's type is BoolField
*/
@Override
public FieldMutatingUpdateProcessor.FieldNameSelector
getDefaultSelector(final SolrCore core) {
return new FieldMutatingUpdateProcessor.FieldNameSelector() {
@Override
public boolean shouldMutate(final String fieldName) {
final IndexSchema schema = core.getLatestSchema();
FieldType type = schema.getFieldTypeNoEx(fieldName);
return (null == type) || type instanceof DateValueFieldType;
}
};
}
}

View File

@ -0,0 +1,122 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.update.processor;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.DoubleValueFieldType;
import org.apache.solr.schema.FieldType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.math.RoundingMode;
import java.text.NumberFormat;
import java.text.ParsePosition;
import java.util.Locale;
/**
* <p>
* Attempts to mutate selected fields that have only CharSequence-typed values
* into Double values. If required, rounding uses ceiling mode:
* {@link RoundingMode#CEILING}. Grouping separators (',' in the ROOT locale)
* are parsed.
* </p>
* <p>
* The default selection behavior is to mutate both those fields that don't match
* a schema field, as well as those fields that match a schema field with a field
* type that uses class solr.DoubleField, solr.TrieDoubleField, or
* solr.SortableDoubleField.
* </p>
* <p>
* If all values are parseable as double (or are already Double), then the field
* will be mutated, replacing each value with its parsed Double equivalent;
* otherwise, no mutation will occur.
* </p>
* <p>
* The locale to use when parsing field values, which will affect the recognized
* grouping separator and decimal characters, may optionally be specified. If
* no locale is configured, then {@link Locale#ROOT} will be used. The following
* configuration specifies the Russian/Russia locale, which will parse the string
* string "12 345,899" as double value 12345.899 (the grouping separator
* character is U+00AO NO-BREAK SPACE).
* </p>
*
* <pre class="prettyprint">
* &lt;processor class="solr.ParseDoubleFieldUpdateProcessorFactory"&gt;
* &lt;str name="locale"&gt;ru_RU&lt;/str&gt;
* &lt;/processor&gt;</pre>
*
* <p>
* See {@link Locale} for a description of acceptable language, country (optional)
* and variant (optional) values, joined with underscore(s).
* </p>
*/
public class ParseDoubleFieldUpdateProcessorFactory extends ParseNumericFieldUpdateProcessorFactory {
private static final Logger log = LoggerFactory.getLogger(ParseDoubleFieldUpdateProcessorFactory.class);
@Override
public UpdateRequestProcessor getInstance(SolrQueryRequest req,
SolrQueryResponse rsp,
UpdateRequestProcessor next) {
return new ParseDoubleFieldUpdateProcessor(getSelector(), locale, next);
}
private static final class ParseDoubleFieldUpdateProcessor extends AllValuesOrNoneFieldMutatingUpdateProcessor {
private final Locale locale;
// NumberFormat instances are not thread safe
private final ThreadLocal<NumberFormat> numberFormat = new ThreadLocal<NumberFormat>() {
@Override
protected NumberFormat initialValue() {
NumberFormat format = NumberFormat.getInstance(locale);
format.setParseIntegerOnly(false);
format.setRoundingMode(RoundingMode.CEILING);
return format;
}
};
ParseDoubleFieldUpdateProcessor(FieldNameSelector selector, Locale locale, UpdateRequestProcessor next) {
super(selector, next);
this.locale = locale;
}
@Override
protected Object mutateValue(Object srcVal) {
if (srcVal instanceof CharSequence) {
String stringVal = srcVal.toString();
ParsePosition pos = new ParsePosition(0);
Number number = numberFormat.get().parse(stringVal, pos);
if (pos.getIndex() != stringVal.length()) {
log.debug("value '{}' is not parseable, thus not mutated; unparsed chars: '{}'",
new Object[] { srcVal, stringVal.substring(pos.getIndex())});
return SKIP_FIELD_VALUE_LIST_SINGLETON;
}
return number.doubleValue();
}
if (srcVal instanceof Double) {
return srcVal;
}
return SKIP_FIELD_VALUE_LIST_SINGLETON;
}
}
@Override
protected boolean isSchemaFieldTypeCompatible(FieldType type) {
return type instanceof DoubleValueFieldType;
}
}

View File

@ -0,0 +1,123 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.update.processor;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.FloatValueFieldType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.math.RoundingMode;
import java.text.NumberFormat;
import java.text.ParsePosition;
import java.util.Locale;
/**
* <p>
* Attempts to mutate selected fields that have only CharSequence-typed values
* into Float values. If required, rounding uses ceiling mode:
* {@link RoundingMode#CEILING}. Grouping separators (',' in the ROOT locale)
* are parsed.
* </p>
* <p>
* The default selection behavior is to mutate both those fields that don't match
* a schema field, as well as those fields that match a schema field with a field
* type that uses class solr.FloatField, solr.TrieFloatField, or
* solr.SortableFloatField.
* </p>
* <p>
* If all values are parseable as float (or are already Float), then the field
* will be mutated, replacing each value with its parsed Float equivalent;
* otherwise, no mutation will occur.
* </p>
* <p>
* The locale to use when parsing field values, which will affect the recognized
* grouping separator and decimal characters, may optionally be specified. If
* no locale is configured, then {@link Locale#ROOT} will be used. The following
* configuration specifies the Russian/Russia locale, which will parse the string
* "12 345,899" as 12345.899f (the grouping separator character is U+00AO NO-BREAK
* SPACE).
* </p>
*
* <pre class="prettyprint">
* &lt;processor class="solr.ParseFloatFieldUpdateProcessorFactory"&gt;
* &lt;str name="locale"&gt;ru_RU&lt;/str&gt;
* &lt;/processor&gt;</pre>
*
* <p>
* See {@link Locale} for a description of acceptable language, country (optional)
* and variant (optional) values, joined with underscore(s).
* </p>
*/
public class ParseFloatFieldUpdateProcessorFactory extends ParseNumericFieldUpdateProcessorFactory {
private static final Logger log = LoggerFactory.getLogger(ParseFloatFieldUpdateProcessorFactory.class);
@Override
public UpdateRequestProcessor getInstance(SolrQueryRequest req,
SolrQueryResponse rsp,
UpdateRequestProcessor next) {
return new ParseFloatFieldUpdateProcessor(getSelector(), locale, next);
}
private static class ParseFloatFieldUpdateProcessor extends AllValuesOrNoneFieldMutatingUpdateProcessor {
private final Locale locale;
// NumberFormat instances are not thread safe
private final ThreadLocal<NumberFormat> numberFormat = new ThreadLocal<NumberFormat>() {
@Override
protected NumberFormat initialValue() {
NumberFormat format = NumberFormat.getInstance(locale);
format.setParseIntegerOnly(false);
format.setRoundingMode(RoundingMode.CEILING);
return format;
}
};
ParseFloatFieldUpdateProcessor(FieldNameSelector selector, Locale locale, UpdateRequestProcessor next) {
super(selector, next);
this.locale = locale;
}
@Override
protected Object mutateValue(Object srcVal) {
if (srcVal instanceof CharSequence) {
String stringVal = srcVal.toString();
ParsePosition pos = new ParsePosition(0);
Number number = numberFormat.get().parse(stringVal, pos);
if (pos.getIndex() != stringVal.length()) {
log.debug("value '{}' is not parseable, thus not mutated; unparsed chars: '{}'",
new Object[] { srcVal, stringVal.substring(pos.getIndex())});
return SKIP_FIELD_VALUE_LIST_SINGLETON;
}
return number.floatValue();
}
if (srcVal instanceof Float) {
return srcVal;
}
return SKIP_FIELD_VALUE_LIST_SINGLETON;
}
}
@Override
protected boolean isSchemaFieldTypeCompatible(FieldType type) {
return type instanceof FloatValueFieldType;
}
}

View File

@ -0,0 +1,124 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.update.processor;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IntValueFieldType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.text.NumberFormat;
import java.text.ParsePosition;
import java.util.Locale;
/**
* <p>
* Attempts to mutate selected fields that have only CharSequence-typed values
* into Integer values. Grouping separators (',' in the ROOT locale) are parsed.
* </p>
* <p>
* The default selection behavior is to mutate both those fields that don't match
* a schema field, as well as those fields that match a schema field with a field
* type that uses class solr.IntField, solr.TrieIntField, or
* solr.SortableIntField.
* </p>
* <p>
* If all values are parseable as int (or are already Integer), then the field
* will be mutated, replacing each value with its parsed Integer equivalent;
* otherwise, no mutation will occur.
* </p>
* <p>
* The locale to use when parsing field values, which will affect the recognized
* grouping separator character, may optionally be specified. If no locale is
* configured, then {@link Locale#ROOT} will be used. The following configuration
* specifies the Russian/Russia locale, which will parse the string "12 345 899"
* as 12345899L (the grouping separator character is U+00AO NO-BREAK SPACE).
* </p>
*
* <pre class="prettyprint">
* &lt;processor class="solr.ParseIntFieldUpdateProcessorFactory"&gt;
* &lt;str name="locale"&gt;ru_RU&lt;/str&gt;
* &lt;/processor&gt;</pre>
*
* <p>
* See {@link Locale} for a description of acceptable language, country (optional)
* and variant (optional) values, joined with underscore(s).
* </p>
*/
public class ParseIntFieldUpdateProcessorFactory extends ParseNumericFieldUpdateProcessorFactory {
private static final Logger log = LoggerFactory.getLogger(ParseIntFieldUpdateProcessorFactory.class);
@Override
public UpdateRequestProcessor getInstance(SolrQueryRequest req,
SolrQueryResponse rsp,
UpdateRequestProcessor next) {
return new ParseIntFieldUpdateProcessor(getSelector(), locale, next);
}
private static final class ParseIntFieldUpdateProcessor extends AllValuesOrNoneFieldMutatingUpdateProcessor {
private final Locale locale;
// NumberFormat instances are not thread safe
private final ThreadLocal<NumberFormat> numberFormat = new ThreadLocal<NumberFormat>() {
@Override
protected NumberFormat initialValue() {
NumberFormat format = NumberFormat.getInstance(locale);
format.setParseIntegerOnly(true);
return format;
}
};
ParseIntFieldUpdateProcessor(FieldNameSelector selector, Locale locale, UpdateRequestProcessor next) {
super(selector, next);
this.locale = locale;
}
@Override
protected Object mutateValue(Object srcVal) {
if (srcVal instanceof CharSequence) {
String stringVal = srcVal.toString();
ParsePosition pos = new ParsePosition(0);
Number number = numberFormat.get().parse(stringVal, pos);
if (pos.getIndex() != stringVal.length()) {
log.debug("value '{}' is not parseable, thus not mutated; unparsed chars: '{}'",
new Object[] { srcVal, stringVal.substring(pos.getIndex())});
return SKIP_FIELD_VALUE_LIST_SINGLETON;
}
int intValue = number.intValue();
if (number.longValue() == (long)intValue) {
// If the high bits don't get truncated by number.intValue()
return intValue;
}
log.debug("value '{}' doesn't fit into an Integer, thus was not mutated", srcVal);
return SKIP_FIELD_VALUE_LIST_SINGLETON;
}
if (srcVal instanceof Integer) {
return srcVal;
}
return SKIP_FIELD_VALUE_LIST_SINGLETON;
}
}
@Override
protected boolean isSchemaFieldTypeCompatible(FieldType type) {
return type instanceof IntValueFieldType;
}
}

View File

@ -0,0 +1,118 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.update.processor;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.LongValueFieldType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.text.NumberFormat;
import java.text.ParsePosition;
import java.util.Locale;
/**
* <p>
* Attempts to mutate selected fields that have only CharSequence-typed values
* into Long values. Grouping separators (',' in the ROOT locale) are parsed.
* </p>
* <p>
* The default selection behavior is to mutate both those fields that don't match
* a schema field, as well as those fields that match a schema field with a field
* type that uses class solr.LongField, solr.TrieLongField, or
* solr.SortableLongField.
* </p>
* <p>
* If all values are parseable as long (or are already Long), then the field
* will be mutated, replacing each value with its parsed Long equivalent;
* otherwise, no mutation will occur.
* </p>
* <p>
* The locale to use when parsing field values, which will affect the recognized
* grouping separator character, may optionally be specified. If no locale is
* configured, then {@link Locale#ROOT} will be used. The following configuration
* specifies the Russian/Russia locale, which will parse the string "12 345 899"
* as 12345899L (the grouping separator character is U+00AO NO-BREAK SPACE).
* </p>
*
* <pre class="prettyprint">
* &lt;processor class="solr.ParseLongFieldUpdateProcessorFactory"&gt;
* &lt;str name="locale"&gt;ru_RU&lt;/str&gt;
* &lt;/processor&gt;</pre>
*
* <p>
* See {@link Locale} for a description of acceptable language, country (optional)
* and variant (optional) values, joined with underscore(s).
* </p>
*/
public class ParseLongFieldUpdateProcessorFactory extends ParseNumericFieldUpdateProcessorFactory {
private static final Logger log = LoggerFactory.getLogger(ParseLongFieldUpdateProcessorFactory.class);
@Override
public UpdateRequestProcessor getInstance(SolrQueryRequest req,
SolrQueryResponse rsp,
UpdateRequestProcessor next) {
return new ParseLongFieldUpdateProcessor(getSelector(), locale, next);
}
private static class ParseLongFieldUpdateProcessor extends AllValuesOrNoneFieldMutatingUpdateProcessor {
private final Locale locale;
// NumberFormat instances are not thread safe
private final ThreadLocal<NumberFormat> numberFormat = new ThreadLocal<NumberFormat>() {
@Override
protected NumberFormat initialValue() {
NumberFormat format = NumberFormat.getInstance(locale);
format.setParseIntegerOnly(true);
return format;
}
};
ParseLongFieldUpdateProcessor(FieldNameSelector selector, Locale locale, UpdateRequestProcessor next) {
super(selector, next);
this.locale = locale;
}
@Override
protected Object mutateValue(Object srcVal) {
if (srcVal instanceof CharSequence) {
String stringVal = srcVal.toString();
ParsePosition pos = new ParsePosition(0);
Number number = numberFormat.get().parse(stringVal, pos);
if (pos.getIndex() != stringVal.length()) {
log.debug("value '{}' is not parseable, thus not mutated; unparsed chars: '{}'",
new Object[] { srcVal, stringVal.substring(pos.getIndex())});
return SKIP_FIELD_VALUE_LIST_SINGLETON;
}
return number.longValue();
}
if (srcVal instanceof Long) {
return srcVal;
}
return SKIP_FIELD_VALUE_LIST_SINGLETON;
}
}
@Override
protected boolean isSchemaFieldTypeCompatible(FieldType type) {
return type instanceof LongValueFieldType;
}
}

View File

@ -0,0 +1,83 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.update.processor;
import org.apache.commons.lang.LocaleUtils;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import java.util.Locale;
/**
* Abstract base class for numeric parsing update processor factories.
* Subclasses can optionally configure a locale. If no locale is configured,
* then {@link Locale#ROOT} will be used. E.g. to configure the French/France
* locale:
*
* <pre class="prettyprint">
* &lt;processor class="solr.Parse[Type]FieldUpdateProcessorFactory"&gt;
* &lt;str name="locale"&gt;fr_FR&lt;/str&gt;
* [...]
* &lt;/processor&gt;</pre>
*
* <p>
* See {@link Locale} for a description of acceptable language, country (optional)
* and variant (optional) values, joined with underscore(s).
* </p>
*/
public abstract class ParseNumericFieldUpdateProcessorFactory extends FieldMutatingUpdateProcessorFactory {
private static final String LOCALE_PARAM = "locale";
protected Locale locale = Locale.ROOT;
@Override
public void init(NamedList args) {
String localeParam = (String)args.remove(LOCALE_PARAM);
if (null != localeParam) {
locale = LocaleUtils.toLocale(localeParam);
}
super.init(args);
}
/**
* Returns true if the given FieldType is compatible with this parsing factory.
*/
protected abstract boolean isSchemaFieldTypeCompatible(FieldType type);
/**
* Returns true if the field doesn't match any schema field or dynamic field,
* or if the matched field's type is compatible
* @param core Where to get the current schema from
*/
@Override
public FieldMutatingUpdateProcessor.FieldNameSelector
getDefaultSelector(final SolrCore core) {
return new FieldMutatingUpdateProcessor.FieldNameSelector() {
@Override
public boolean shouldMutate(final String fieldName) {
final IndexSchema schema = core.getLatestSchema();
FieldType type = schema.getFieldTypeNoEx(fieldName);
return (null == type) || isSchemaFieldTypeCompatible(type);
}
};
}
}

View File

@ -0,0 +1,230 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!--
Test Config that enumerates many different parsing update processor chain
configurations.
-->
<config>
<luceneMatchVersion>${tests.luceneMatchVersion:LUCENE_CURRENT}</luceneMatchVersion>
<requestHandler name="standard" class="solr.StandardRequestHandler"></requestHandler>
<directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.RAMDirectoryFactory}"/>
<updateRequestProcessorChain name="parse-date">
<processor class="solr.ParseDateFieldUpdateProcessorFactory">
<str name="format">yyyy-MM-dd'T'HH:mm:ss.SSSZ</str>
</processor>
<processor class="solr.RunUpdateProcessorFactory" />
</updateRequestProcessorChain>
<updateRequestProcessorChain name="parse-date-no-run-processor">
<processor class="solr.ParseDateFieldUpdateProcessorFactory">
<str name="format">yyyy-MM-dd'T'HH:mm:ss.SSSZ</str>
</processor>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="parse-date-explicit-not-in-schema-selector-no-run-processor">
<processor class="solr.ParseDateFieldUpdateProcessorFactory">
<bool name="fieldNameMatchesSchemaField">false</bool>
<str name="format">yyyy-MM-dd'T'HH:mm:ss.SSSZ</str>
</processor>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="parse-date-explicit-typeclass-selector-no-run-processor">
<processor class="solr.ParseDateFieldUpdateProcessorFactory">
<str name="typeClass">solr.DateField</str>
<str name="typeClass">solr.TrieDateField</str>
<str name="format">yyyy-MM-dd'T'HH:mm:ss.SSSZ</str>
</processor>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="parse-date-non-UTC-defaultTimeZone">
<processor class="solr.ParseDateFieldUpdateProcessorFactory">
<str name="defaultTimeZone">America/New_York</str>
<str name="locale">en_US</str>
<str name="format">yyyy-MM-dd'T'HH:mm:ss.SSSZ</str>
<str name="format">yyyy-MM-dd'T'HH:mm:ss.SSS</str>
</processor>
<processor class="solr.RunUpdateProcessorFactory" />
</updateRequestProcessorChain>
<updateRequestProcessorChain name="US-Pacific-parse-date-no-run-processor">
<processor class="solr.ParseDateFieldUpdateProcessorFactory">
<str name="defaultTimeZone">America/Los_Angeles</str>
<arr name="format">
<str>MM/dd/yyyy</str>
</arr>
</processor>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="parse-date-UTC-defaultTimeZone-no-run-processor">
<processor class="solr.ParseDateFieldUpdateProcessorFactory">
<str name="defaultTimeZone">UTC</str>
<str name="locale">en_US</str>
<arr name="format">
<str>yyyy-MM-dd'T'HH:mm:ss.SSSZ</str>
<str>yyyy-MM-dd'T'HH:mm:ss,SSSZ</str>
<str>yyyy-MM-dd'T'HH:mm:ss.SSS</str>
<str>yyyy-MM-dd'T'HH:mm:ss,SSS</str>
<str>yyyy-MM-dd'T'HH:mm:ssZ</str>
<str>yyyy-MM-dd'T'HH:mm:ss</str>
<str>yyyy-MM-dd'T'HH:mmZ</str>
<str>yyyy-MM-dd'T'HH:mm</str>
<str>yyyy-MM-dd HH:mm:ss.SSSZ</str>
<str>yyyy-MM-dd HH:mm:ss,SSSZ</str>
<str>yyyy-MM-dd HH:mm:ss.SSS</str>
<str>yyyy-MM-dd HH:mm:ss,SSS</str>
<str>yyyy-MM-dd HH:mm:ssZ</str>
<str>yyyy-MM-dd HH:mm:ss</str>
<str>yyyy-MM-dd HH:mmZ</str>
<str>yyyy-MM-dd HH:mm</str>
<str>yyyy-MM-dd hh:mm a</str>
<str>yyyy-MM-dd hh:mma</str>
<str>yyyy-MM-dd</str>
<str>EEE MMM dd HH:mm:ss Z yyyy</str>
<str>EEE MMM dd HH:mm:ss yyyy Z</str>
<str>EEE MMM dd HH:mm:ss yyyy</str>
<str>EEE, dd MMM yyyy HH:mm:ss Z</str>
<str>EEEE, dd-MMM-yy HH:mm:ss Z</str>
<str>EEEE, MMMM dd, yyyy</str>
<str>MMMM dd, yyyy</str>
<str>MMM. dd, yyyy</str>
</arr>
</processor>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="parse-french-date-UTC-defaultTimeZone-no-run-processor">
<processor class="solr.ParseDateFieldUpdateProcessorFactory">
<str name="defaultTimeZone">UTC</str>
<str name="locale">fr</str>
<str name="format">'le' EEEE dd MMMM yyyy</str>
</processor>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="parse-int">
<processor class="solr.ParseIntFieldUpdateProcessorFactory"/>
<processor class="solr.RunUpdateProcessorFactory"/>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="parse-int-no-run-processor">
<processor class="solr.ParseIntFieldUpdateProcessorFactory"/>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="parse-int-russian-no-run-processor">
<processor class="solr.ParseIntFieldUpdateProcessorFactory">
<str name="locale">ru_RU</str>
</processor>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="parse-long">
<processor class="solr.ParseLongFieldUpdateProcessorFactory"/>
<processor class="solr.RunUpdateProcessorFactory"/>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="parse-long-no-run-processor">
<processor class="solr.ParseLongFieldUpdateProcessorFactory"/>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="parse-long-russian-no-run-processor">
<processor class="solr.ParseLongFieldUpdateProcessorFactory">
<str name="locale">ru_RU</str>
</processor>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="parse-float">
<processor class="solr.ParseFloatFieldUpdateProcessorFactory"/>
<processor class="solr.RunUpdateProcessorFactory"/>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="parse-float-no-run-processor">
<processor class="solr.ParseFloatFieldUpdateProcessorFactory"/>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="parse-float-french-no-run-processor">
<processor class="solr.ParseFloatFieldUpdateProcessorFactory">
<str name="locale">fr_FR</str>
</processor>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="parse-double">
<processor class="solr.ParseDoubleFieldUpdateProcessorFactory"/>
<processor class="solr.RunUpdateProcessorFactory"/>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="parse-double-no-run-processor">
<processor class="solr.ParseDoubleFieldUpdateProcessorFactory"/>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="parse-double-french-no-run-processor">
<processor class="solr.ParseDoubleFieldUpdateProcessorFactory">
<str name="locale">fr_FR</str>
</processor>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="parse-boolean">
<processor class="solr.ParseBooleanFieldUpdateProcessorFactory"/>
<processor class="solr.RunUpdateProcessorFactory"/>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="parse-boolean-no-run-processor">
<processor class="solr.ParseBooleanFieldUpdateProcessorFactory"/>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="parse-boolean-alternate-values-no-run-processor">
<processor class="solr.ParseBooleanFieldUpdateProcessorFactory">
<bool name="caseSensitive">false</bool>
<arr name="trueValue">
<str>true</str>
<str>YES</str>
<str>on</str>
</arr>
<arr name="falseValue">
<str>false</str>
<str>no</str>
<str>oFF</str>
</arr>
</processor>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="parse-boolean-alternate-single-values-no-run-processor">
<processor class="solr.ParseBooleanFieldUpdateProcessorFactory">
<str name="trueValue">yup</str>
<str name="falseValue">nope</str>
</processor>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="cascading-parsers-no-run-processor">
<processor class="solr.ParseBooleanFieldUpdateProcessorFactory"/>
<processor class="solr.ParseIntFieldUpdateProcessorFactory"/>
<processor class="solr.ParseLongFieldUpdateProcessorFactory"/>
<!-- Disabled Float because it will always claim floating point values and round -->
<!-- to fit values in available precision -->
<!-- <processor class="solr.ParseFloatFieldUpdateProcessorFactory"/> -->
<processor class="solr.ParseDoubleFieldUpdateProcessorFactory"/>
<processor class="solr.ParseDateFieldUpdateProcessorFactory">
<arr name="format">
<str>yyyy-MM-dd</str>
<str>yyyy-MM-dd'T'HH:mm:ss.SSSZ</str>
<str>yyyy-MM-dd'T'HH:mm</str>
</arr>
</processor>
</updateRequestProcessorChain>
</config>

View File

@ -0,0 +1,910 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.update.processor;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.schema.IndexSchema;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.joda.time.format.ISODateTimeFormat;
import org.junit.BeforeClass;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Set;
/**
* Tests for the field mutating update processors
* that parse Dates, Longs, Doubles, and Booleans.
*/
public class ParsingFieldUpdateProcessorsTest extends UpdateProcessorTestBase {
private static final double EPSILON = 1E-15;
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig-parsing-update-processor-chains.xml", "schema12.xml");
}
public void testParseDateRoundTrip() throws Exception {
IndexSchema schema = h.getCore().getLatestSchema();
assertNotNull(schema.getFieldOrNull("date_dt")); // should match "*_dt" dynamic field
String dateString = "2010-11-12T13:14:15.168Z";
SolrInputDocument d = processAdd("parse-date", doc(f("id", "9"), f("date_dt", dateString)));
assertNotNull(d);
DateTimeFormatter dateTimeFormatter = ISODateTimeFormat.dateTime();
DateTime dateTime = dateTimeFormatter.parseDateTime(dateString);
assertTrue(d.getFieldValue("date_dt") instanceof Date);
assertEquals(dateTime.getMillis(), ((Date) d.getFieldValue("date_dt")).getTime());
assertU(commit());
assertQ(req("id:9"), "//date[@name='date_dt'][.='" + dateString + "']");
}
public void testParseTrieDateRoundTrip() throws Exception {
IndexSchema schema = h.getCore().getLatestSchema();
assertNotNull(schema.getFieldOrNull("date_tdt")); // should match "*_tdt" dynamic field
String dateString = "2010-11-12T13:14:15.168Z";
SolrInputDocument d = processAdd("parse-date", doc(f("id", "39"), f("date_tdt", dateString)));
assertNotNull(d);
DateTimeFormatter dateTimeFormatter = ISODateTimeFormat.dateTime();
DateTime dateTime = dateTimeFormatter.parseDateTime(dateString);
assertTrue(d.getFieldValue("date_tdt") instanceof Date);
assertEquals(dateTime.getMillis(), ((Date) d.getFieldValue("date_tdt")).getTime());
assertU(commit());
assertQ(req("id:39"), "//date[@name='date_tdt'][.='" + dateString + "']");
}
public void testParseDateFieldNotInSchema() throws Exception {
IndexSchema schema = h.getCore().getLatestSchema();
assertNull(schema.getFieldOrNull("not_in_schema"));
String dateString = "2010-11-12T13:14:15.168Z";
DateTimeFormatter dateTimeFormatter = ISODateTimeFormat.dateTime();
DateTime dateTime = dateTimeFormatter.parseDateTime(dateString);
SolrInputDocument d = processAdd("parse-date-no-run-processor",
doc(f("id", "18"), f("not_in_schema", dateString)));
assertNotNull(d);
assertTrue(d.getFieldValue("not_in_schema") instanceof Date);
assertEquals(dateTime.getMillis(), ((Date)d.getFieldValue("not_in_schema")).getTime());
d = processAdd("parse-date-no-run-processor",
doc(f("id", "36"), f("not_in_schema", "not a date", dateString)));
assertNotNull(d);
for (Object val : d.getFieldValues("not_in_schema")) {
// check that nothing was mutated, since not all field values are parseable as dates
assertTrue(val instanceof String);
}
d = processAdd("parse-date-no-run-processor",
doc(f("id", "72"), f("not_in_schema", dateString, "not a date")));
assertNotNull(d);
for (Object val : d.getFieldValues("not_in_schema")) {
// check again that nothing was mutated, but with a valid date first this time
assertTrue(val instanceof String);
}
}
public void testParseDateNonUTCdefaultTimeZoneRoundTrip() throws Exception {
IndexSchema schema = h.getCore().getLatestSchema();
assertNotNull(schema.getFieldOrNull("date_dt")); // should match "*_dt" dynamic field
String dateStringNoTimeZone = "2010-11-12T13:14:15.168";
String dateStringUTC = dateStringNoTimeZone + "Z";
// dateStringNoTimeZone interpreted as being in timeZone America/New_York, then printed as UTC
String dateStringUSEasternTimeAsUTC = "2010-11-12T18:14:15.168Z";
SolrInputDocument d = processAdd
("parse-date-non-UTC-defaultTimeZone", doc(f("id", "99"), f("dateUTC_dt", dateStringUTC),
f("dateNoTimeZone_dt", dateStringNoTimeZone)));
assertNotNull(d);
String pattern = "yyyy-MM-dd'T'HH:mm:ss.SSSZ";
DateTimeFormatter dateTimeFormatterUTC = DateTimeFormat.forPattern(pattern);
DateTime dateTimeUTC = dateTimeFormatterUTC.parseDateTime(dateStringUTC);
assertTrue(d.getFieldValue("dateUTC_dt") instanceof Date);
assertTrue(d.getFieldValue("dateNoTimeZone_dt") instanceof Date);
assertEquals(dateTimeUTC.getMillis(), ((Date) d.getFieldValue("dateUTC_dt")).getTime());
assertU(commit());
assertQ(req("id:99")
,"//date[@name='dateUTC_dt'][.='" + dateStringUTC + "']"
,"//date[@name='dateNoTimeZone_dt'][.='" + dateStringUSEasternTimeAsUTC + "']");
}
public void testParseDateExplicitNotInSchemaSelector() throws Exception {
IndexSchema schema = h.getCore().getLatestSchema();
assertNull(schema.getFieldOrNull("not_in_schema"));
String dateString = "2010-11-12T13:14:15.168Z";
DateTimeFormatter dateTimeFormatter = ISODateTimeFormat.dateTime();
DateTime dateTime = dateTimeFormatter.parseDateTime(dateString);
SolrInputDocument d = processAdd("parse-date-explicit-not-in-schema-selector-no-run-processor",
doc(f("id", "88"), f("not_in_schema", dateString)));
assertNotNull(d);
assertTrue(d.getFieldValue("not_in_schema") instanceof Date);
assertEquals(dateTime.getMillis(), ((Date)d.getFieldValue("not_in_schema")).getTime());
}
public void testParseDateExplicitTypeClassSelector() throws Exception {
IndexSchema schema = h.getCore().getLatestSchema();
assertNotNull(schema.getFieldOrNull("date_dt"));
String dateString = "2010-11-12T13:14:15.168Z";
DateTimeFormatter dateTimeFormatter = ISODateTimeFormat.dateTime();
DateTime dateTime = dateTimeFormatter.parseDateTime(dateString);
SolrInputDocument d = processAdd("parse-date-explicit-typeclass-selector-no-run-processor",
doc(f("id", "77"), f("date_dt", dateString)));
assertNotNull(d);
assertTrue(d.getFieldValue("date_dt") instanceof Date);
assertEquals(dateTime.getMillis(), ((Date)d.getFieldValue("date_dt")).getTime());
}
public void testParseUSPacificDate() throws Exception {
IndexSchema schema = h.getCore().getLatestSchema();
assertNull(schema.getFieldOrNull("not_in_schema"));
String dateString = "8/9/2010"; // Interpreted as 00:00 US Pacific Daylight Time = UTC+07:00
String dateStringUTC = "2010-08-09T07:00:00.000Z";
SolrInputDocument d = processAdd("US-Pacific-parse-date-no-run-processor",
doc(f("id", "288"), f("not_in_schema", dateString)));
assertNotNull(d);
assertTrue(d.getFieldValue("not_in_schema") instanceof Date);
assertEquals(dateStringUTC,
(new DateTime(((Date)d.getFieldValue("not_in_schema")).getTime(),DateTimeZone.UTC)).toString());
}
public void testParseDateFormats() throws Exception {
String[] formatExamples = {
"yyyy-MM-dd'T'HH:mm:ss.SSSZ", "2010-01-15T00:00:00.000Z",
"yyyy-MM-dd'T'HH:mm:ss,SSSZ", "2010-01-15T00:00:00,000Z",
"yyyy-MM-dd'T'HH:mm:ss.SSS", "2010-01-15T00:00:00.000",
"yyyy-MM-dd'T'HH:mm:ss,SSS", "2010-01-15T00:00:00,000",
"yyyy-MM-dd'T'HH:mm:ssZ", "2010-01-15T00:00:00Z",
"yyyy-MM-dd'T'HH:mm:ss", "2010-01-15T00:00:00",
"yyyy-MM-dd'T'HH:mmZ", "2010-01-15T00:00Z",
"yyyy-MM-dd'T'HH:mm", "2010-01-15T00:00",
"yyyy-MM-dd HH:mm:ss.SSSZ", "2010-01-15 00:00:00.000Z",
"yyyy-MM-dd HH:mm:ss,SSSZ", "2010-01-15 00:00:00,000Z",
"yyyy-MM-dd HH:mm:ss.SSS", "2010-01-15 00:00:00.000",
"yyyy-MM-dd HH:mm:ss,SSS", "2010-01-15 00:00:00,000",
"yyyy-MM-dd HH:mm:ssZ", "2010-01-15 00:00:00Z",
"yyyy-MM-dd HH:mm:ss", "2010-01-15 00:00:00",
"yyyy-MM-dd HH:mmZ", "2010-01-15 00:00Z",
"yyyy-MM-dd HH:mm", "2010-01-15 00:00",
"yyyy-MM-dd hh:mm a", "2010-01-15 12:00 AM",
"yyyy-MM-dd hh:mma", "2010-01-15 12:00AM",
"yyyy-MM-dd", "2010-01-15",
"EEE MMM dd HH:mm:ss Z yyyy", "Fri Jan 15 00:00:00 +0000 2010",
"EEE MMM dd HH:mm:ss yyyy Z", "Fri Jan 15 00:00:00 2010 +00:00",
"EEE MMM dd HH:mm:ss yyyy", "Fri Jan 15 00:00:00 2010",
"EEE, dd MMM yyyy HH:mm:ss Z", "Fri, 15 Jan 2010 00:00:00 +00:00",
"EEEE, dd-MMM-yy HH:mm:ss Z", "Friday, 15-Jan-10 00:00:00 +00:00",
"EEEE, MMMM dd, yyyy", "Friday, January 15, 2010",
"MMMM dd, yyyy", "January 15, 2010",
"MMM. dd, yyyy", "Jan. 15, 2010"
};
IndexSchema schema = h.getCore().getLatestSchema();
assertNotNull(schema.getFieldOrNull("dateUTC_dt")); // should match "*_dt" dynamic field
String dateTimePattern = "yyyy-MM-dd'T'HH:mm:ss.SSSZ";
DateTimeFormatter dateTimeFormatterUTC = DateTimeFormat.forPattern(dateTimePattern);
DateTime dateTimeUTC = dateTimeFormatterUTC.parseDateTime(formatExamples[1]);
for (int i = 0 ; i < formatExamples.length ; i += 2) {
String format = formatExamples[i];
String dateString = formatExamples[i + 1];
String id = "95" + i;
SolrInputDocument d = processAdd("parse-date-UTC-defaultTimeZone-no-run-processor",
doc(f("id", id), f("dateUTC_dt", dateString)));
assertNotNull(d);
assertTrue("date '" + dateString + "' with format '" + format + "' is not mutated to a Date",
d.getFieldValue("dateUTC_dt") instanceof Date);
assertEquals("date '" + dateString + "' with format '" + format + "' mismatched milliseconds",
dateTimeUTC.getMillis(), ((Date)d.getFieldValue("dateUTC_dt")).getTime());
}
}
public void testParseFrenchDate() throws Exception {
IndexSchema schema = h.getCore().getLatestSchema();
assertNull(schema.getFieldOrNull("not_in_schema"));
String frenchDateString = "le vendredi 15 janvier 2010";
String dateString = "2010-01-15T00:00:00.000Z";
DateTimeFormatter dateTimeFormatter = ISODateTimeFormat.dateTime();
DateTime dateTime = dateTimeFormatter.parseDateTime(dateString);
SolrInputDocument d = processAdd("parse-french-date-UTC-defaultTimeZone-no-run-processor",
doc(f("id", "88"), f("not_in_schema", frenchDateString)));
assertNotNull(d);
assertTrue(d.getFieldValue("not_in_schema") instanceof Date);
assertEquals(dateTime.getMillis(), ((Date)d.getFieldValue("not_in_schema")).getTime());
}
public void testFailedParseMixedDate() throws Exception {
IndexSchema schema = h.getCore().getLatestSchema();
assertNull(schema.getFieldOrNull("not_in_schema"));
DateTimeFormatter dateTimeFormatter = ISODateTimeFormat.dateOptionalTimeParser().withZoneUTC();
Map<Object,Object> mixed = new HashMap<Object,Object>();
String[] dateStrings = { "2020-05-13T18:47", "1989-12-14", "1682-07-22T18:33:00.000Z" };
for (String dateString : dateStrings) {
mixed.put(dateTimeFormatter.parseDateTime(dateString).toDate(), dateString);
}
Double extraDouble = 29.554d;
mixed.put(extraDouble, extraDouble); // Double-typed field value
SolrInputDocument d = processAdd("parse-date-no-run-processor",
doc(f("id", "7201"), f("not_in_schema", mixed.values())));
assertNotNull(d);
boolean foundDouble = false;
for (Object o : d.getFieldValues("not_in_schema")) {
if (extraDouble == o) {
foundDouble = true;
} else {
assertTrue(o instanceof String);
}
mixed.values().remove(o);
}
assertTrue(foundDouble);
assertTrue(mixed.isEmpty());
}
public void testParseIntRoundTrip() throws Exception {
IndexSchema schema = h.getCore().getLatestSchema();
assertNotNull(schema.getFieldOrNull("int1_i")); // should match dynamic field "*_i"
assertNotNull(schema.getFieldOrNull("int2_i")); // should match dynamic field "*_i"
int value = 1089883491;
String intString1 = "1089883491";
String intString2 = "1,089,883,491";
SolrInputDocument d = processAdd("parse-int",
doc(f("id", "113"), f("int1_i", intString1), f("int2_i", intString2)));
assertNotNull(d);
assertTrue(d.getFieldValue("int1_i") instanceof Integer);
assertEquals(value, ((Integer)d.getFieldValue("int1_i")).intValue());
assertTrue(d.getFieldValue("int2_i") instanceof Integer);
assertEquals(value, ((Integer)d.getFieldValue("int2_i")).intValue());
assertU(commit());
assertQ(req("id:113")
,"//int[@name='int1_i'][.='" + value + "']"
,"//int[@name='int2_i'][.='" + value + "']");
}
public void testParseIntNonRootLocale() throws Exception {
IndexSchema schema = h.getCore().getLatestSchema();
assertNotNull(schema.getFieldOrNull("int_i")); // should match dynamic field "*_i"
assertNull(schema.getFieldOrNull("not_in_schema"));
int value = 1089883491;
String intString1 = "1089883491";
String intString2 = "1 089 883 491"; // no-break space U+00A0
SolrInputDocument d = processAdd("parse-int-russian-no-run-processor",
doc(f("id", "113"), f("int_i", intString1), f("not_in_schema", intString2)));
assertNotNull(d);
assertTrue(d.getFieldValue("int_i") instanceof Integer);
assertEquals(value, ((Integer)d.getFieldValue("int_i")).intValue());
assertTrue(d.getFieldValue("not_in_schema") instanceof Integer);
assertEquals(value, ((Integer)d.getFieldValue("not_in_schema")).intValue());
}
public void testParseTrieIntRoundTrip() throws Exception {
IndexSchema schema = h.getCore().getLatestSchema();
assertNotNull(schema.getFieldOrNull("int1_ti")); // should match dynamic field "*_ti"
assertNotNull(schema.getFieldOrNull("int2_ti")); // should match dynamic field "*_ti"
int value = 1089883491;
String intString1 = "1089883491";
String intString2 = "1,089,883,491";
SolrInputDocument d = processAdd("parse-int",
doc(f("id", "113"), f("int1_ti", intString1), f("int2_ti", intString2)));
assertNotNull(d);
assertTrue(d.getFieldValue("int1_ti") instanceof Integer);
assertEquals(value, ((Integer)d.getFieldValue("int1_ti")).intValue());
assertTrue(d.getFieldValue("int2_ti") instanceof Integer);
assertEquals(value, ((Integer)d.getFieldValue("int2_ti")).intValue());
assertU(commit());
assertQ(req("id:113")
,"//int[@name='int1_ti'][.='" + value + "']"
,"//int[@name='int2_ti'][.='" + value + "']");
}
public void testIntOverflow() throws Exception {
IndexSchema schema = h.getCore().getLatestSchema();
assertNull(schema.getFieldOrNull("not_in_schema1"));
assertNull(schema.getFieldOrNull("not_in_schema2"));
long longValue1 = (long)Integer.MAX_VALUE + 100L;
long longValue2 = (long)Integer.MIN_VALUE - 100L;
String longString1 = Long.toString(longValue1);
String longString2 = Long.toString(longValue2);
SolrInputDocument d = processAdd("parse-int-no-run-processor",
doc(f("id", "282"), f("not_in_schema1", longString1), f("not_in_schema2", longString2)));
assertNotNull(d);
assertTrue(d.getFieldValue("not_in_schema1") instanceof String);
assertTrue(d.getFieldValue("not_in_schema2") instanceof String);
}
public void testFailedParseMixedInt() throws Exception {
IndexSchema schema = h.getCore().getLatestSchema();
assertNull(schema.getFieldOrNull("not_in_schema"));
Map<Object,Object> mixed = new HashMap<Object,Object>();
Float floatVal = 294423.0f;
mixed.put(85, "85");
mixed.put(floatVal, floatVal); // Float-typed field value
mixed.put(-2894518, "-2,894,518");
mixed.put(1879472193, "1,879,472,193");
SolrInputDocument d = processAdd("parse-int-no-run-processor",
doc(f("id", "7202"), f("not_in_schema", mixed.values())));
assertNotNull(d);
boolean foundFloat = false;
for (Object o : d.getFieldValues("not_in_schema")) {
if (floatVal == o) {
foundFloat = true;
} else {
assertTrue(o instanceof String);
}
mixed.values().remove(o);
}
assertTrue(foundFloat);
assertTrue(mixed.isEmpty());
}
public void testParseLongRoundTrip() throws Exception {
IndexSchema schema = h.getCore().getLatestSchema();
assertNotNull(schema.getFieldOrNull("long1_l")); // should match dynamic field "*_l"
assertNotNull(schema.getFieldOrNull("long2_l")); // should match dynamic field "*_l"
long value = 1089883491L;
String longString1 = "1089883491";
String longString2 = "1,089,883,491";
SolrInputDocument d = processAdd("parse-long",
doc(f("id", "113"), f("long1_l", longString1), f("long2_l", longString2)));
assertNotNull(d);
assertTrue(d.getFieldValue("long1_l") instanceof Long);
assertEquals(value, ((Long) d.getFieldValue("long1_l")).longValue());
assertTrue(d.getFieldValue("long2_l") instanceof Long);
assertEquals(value, ((Long)d.getFieldValue("long2_l")).longValue());
assertU(commit());
assertQ(req("id:113")
,"//long[@name='long1_l'][.='" + value + "']"
,"//long[@name='long2_l'][.='" + value + "']");
}
public void testParseLongNonRootLocale() throws Exception {
IndexSchema schema = h.getCore().getLatestSchema();
assertNotNull(schema.getFieldOrNull("long_l")); // should match dynamic field "*_l"
assertNull(schema.getFieldOrNull("not_in_schema"));
long value = 1089883491L;
String longString1 = "1089883491";
String longString2 = "1 089 883 491"; // no-break space U+00A0
SolrInputDocument d = processAdd("parse-long-russian-no-run-processor",
doc(f("id", "113"), f("long_l", longString1), f("not_in_schema", longString2)));
assertNotNull(d);
assertTrue(d.getFieldValue("long_l") instanceof Long);
assertEquals(value, ((Long)d.getFieldValue("long_l")).longValue());
assertTrue(d.getFieldValue("not_in_schema") instanceof Long);
assertEquals(value, ((Long)d.getFieldValue("not_in_schema")).longValue());
}
public void testParseTrieLongRoundTrip() throws Exception {
IndexSchema schema = h.getCore().getLatestSchema();
assertNotNull(schema.getFieldOrNull("long1_tl")); // should match dynamic field "*_tl"
assertNotNull(schema.getFieldOrNull("long2_tl")); // should match dynamic field "*_tl"
long value = 1089883491L;
String longString1 = "1089883491";
String longString2 = "1,089,883,491";
SolrInputDocument d = processAdd("parse-long",
doc(f("id", "113"), f("long1_tl", longString1), f("long2_tl", longString2)));
assertNotNull(d);
assertTrue(d.getFieldValue("long1_tl") instanceof Long);
assertEquals(value, ((Long)d.getFieldValue("long1_tl")).longValue());
assertTrue(d.getFieldValue("long2_tl") instanceof Long);
assertEquals(value, ((Long)d.getFieldValue("long2_tl")).longValue());
assertU(commit());
assertQ(req("id:113")
,"//long[@name='long1_tl'][.='" + value + "']"
,"//long[@name='long2_tl'][.='" + value + "']");
}
public void testFailedParseMixedLong() throws Exception {
IndexSchema schema = h.getCore().getLatestSchema();
assertNull(schema.getFieldOrNull("not_in_schema"));
Map<Object,Object> mixed = new HashMap<Object,Object>();
Float floatVal = 294423.0f;
mixed.put(85L, "85");
mixed.put(floatVal, floatVal); // Float-typed field value
mixed.put(-2894518L, "-2,894,518");
mixed.put(1879472193L, "1,879,472,193");
SolrInputDocument d = processAdd("parse-long-no-run-processor",
doc(f("id", "7204"), f("not_in_schema", mixed.values())));
assertNotNull(d);
boolean foundFloat = false;
for (Object o : d.getFieldValues("not_in_schema")) {
if (floatVal == o) {
foundFloat = true;
} else {
assertTrue(o instanceof String);
}
mixed.values().remove(o);
}
assertTrue(foundFloat);
assertTrue(mixed.isEmpty());
}
public void testParseFloatRoundTrip() throws Exception {
IndexSchema schema = h.getCore().getLatestSchema();
assertNotNull(schema.getFieldOrNull("float1_f")); // should match dynamic field "*_f"
assertNotNull(schema.getFieldOrNull("float2_f")); // should match dynamic field "*_f"
float value = 10898.83491f;
String floatString1 = "10898.83491";
String floatString2 = "10,898.83491";
SolrInputDocument d = processAdd("parse-float",
doc(f("id", "128"), f("float1_f", floatString1), f("float2_f", floatString2)));
assertNotNull(d);
assertTrue(d.getFieldValue("float1_f") instanceof Float);
assertEquals(value, (Float)d.getFieldValue("float1_f"), EPSILON);
assertTrue(d.getFieldValue("float2_f") instanceof Float);
assertEquals(value, (Float)d.getFieldValue("float2_f"), EPSILON);
assertU(commit());
assertQ(req("id:128")
,"//float[@name='float1_f'][.='" + value + "']"
,"//float[@name='float2_f'][.='" + value + "']");
}
public void testParseFloatNonRootLocale() throws Exception {
IndexSchema schema = h.getCore().getLatestSchema();
assertNotNull(schema.getFieldOrNull("float_f")); // should match dynamic field "*_f"
assertNull(schema.getFieldOrNull("not_in_schema"));
float value = 10898.83491f;
String floatString1 = "10898,83491";
String floatString2 = "10 898,83491"; // no-break space: U+00A0
SolrInputDocument d = processAdd("parse-float-french-no-run-processor",
doc(f("id", "140"), f("float_f", floatString1),
f("not_in_schema", floatString2)));
assertNotNull(d);
assertTrue(d.getFieldValue("float_f") instanceof Float);
assertEquals(value, (Float)d.getFieldValue("float_f"), EPSILON);
assertTrue(d.getFieldValue("not_in_schema") instanceof Float);
assertEquals(value, (Float)d.getFieldValue("not_in_schema"), EPSILON);
}
public void testParseTrieFloatRoundTrip() throws Exception {
IndexSchema schema = h.getCore().getLatestSchema();
assertNotNull(schema.getFieldOrNull("float1_tf")); // should match dynamic field "*_tf"
assertNotNull(schema.getFieldOrNull("float2_tf")); // should match dynamic field "*_tf"
float value = 10898.83491f;
String floatString1 = "10898.83491";
String floatString2 = "10,898.83491";
SolrInputDocument d = processAdd("parse-float",
doc(f("id", "728"), f("float1_tf", floatString1), f("float2_tf", floatString2)));
assertNotNull(d);
assertTrue(d.getFieldValue("float1_tf") instanceof Float);
assertEquals(value, (Float)d.getFieldValue("float1_tf"), EPSILON);
assertTrue(d.getFieldValue("float2_tf") instanceof Float);
assertEquals(value, (Float)d.getFieldValue("float2_tf"), EPSILON);
assertU(commit());
assertQ(req("id:728")
,"//float[@name='float1_tf'][.='" + value + "']"
,"//float[@name='float2_tf'][.='" + value + "']");
}
public void testMixedFloats() throws Exception {
IndexSchema schema = h.getCore().getLatestSchema();
assertNotNull(schema.getFieldOrNull("float_tf")); // should match dynamic field "*_tf"
Map<Float,Object> mixedFloats = new HashMap<Float,Object>();
mixedFloats.put(85.0f, "85");
mixedFloats.put(2894518.0f, "2,894,518");
mixedFloats.put(2.94423E-9f, 2.94423E-9f); // Float-typed field value
mixedFloats.put(48794721.937f, "48,794,721.937");
SolrInputDocument d = processAdd("parse-float-no-run-processor",
doc(f("id", "342"), f("float_tf", mixedFloats.values())));
assertNotNull(d);
for (Object o : d.getFieldValues("float_tf")) {
assertTrue(o instanceof Float);
mixedFloats.remove(o);
}
assertTrue(mixedFloats.isEmpty());
}
public void testFailedParseMixedFloat() throws Exception {
IndexSchema schema = h.getCore().getLatestSchema();
assertNull(schema.getFieldOrNull("not_in_schema"));
Map<Object,Object> mixed = new HashMap<Object,Object>();
Long longVal = 294423L;
mixed.put(85L, "85");
mixed.put(longVal, longVal); // Float-typed field value
mixed.put(-2894518L, "-2,894,518");
mixed.put(1879472193L, "1,879,472,193");
SolrInputDocument d = processAdd("parse-float-no-run-processor",
doc(f("id", "7205"), f("not_in_schema", mixed.values())));
assertNotNull(d);
boolean foundLong = false;
for (Object o : d.getFieldValues("not_in_schema")) {
if (longVal == o) {
foundLong = true;
} else {
assertTrue(o instanceof String);
}
mixed.values().remove(o);
}
assertTrue(foundLong);
assertTrue(mixed.isEmpty());
}
public void testParseDoubleRoundTrip() throws Exception {
IndexSchema schema = h.getCore().getLatestSchema();
assertNotNull(schema.getFieldOrNull("double1_d")); // should match dynamic field "*_d"
assertNotNull(schema.getFieldOrNull("double2_d")); // should match dynamic field "*_d"
double value = 10898.83491;
String doubleString1 = "10898.83491";
String doubleString2 = "10,898.83491";
SolrInputDocument d = processAdd("parse-double",
doc(f("id", "128"), f("double1_d", doubleString1), f("double2_d", doubleString2)));
assertNotNull(d);
assertTrue(d.getFieldValue("double1_d") instanceof Double);
assertEquals(value, (Double)d.getFieldValue("double1_d"), EPSILON);
assertTrue(d.getFieldValue("double2_d") instanceof Double);
assertEquals(value, (Double)d.getFieldValue("double2_d"), EPSILON);
assertU(commit());
assertQ(req("id:128")
,"//double[@name='double1_d'][.='" + value + "']"
,"//double[@name='double2_d'][.='" + value + "']");
}
public void testParseDoubleNonRootLocale() throws Exception {
IndexSchema schema = h.getCore().getLatestSchema();
assertNotNull(schema.getFieldOrNull("double_d")); // should match dynamic field "*_d"
assertNull(schema.getFieldOrNull("not_in_schema"));
double value = 10898.83491;
String doubleString1 = "10898,83491";
String doubleString2 = "10 898,83491"; // no-break space: U+00A0
SolrInputDocument d = processAdd("parse-double-french-no-run-processor",
doc(f("id", "140"), f("double_d", doubleString1),
f("not_in_schema", doubleString2)));
assertNotNull(d);
assertTrue(d.getFieldValue("double_d") instanceof Double);
assertEquals(value, (Double)d.getFieldValue("double_d"), EPSILON);
assertTrue(d.getFieldValue("not_in_schema") instanceof Double);
assertEquals(value, (Double)d.getFieldValue("not_in_schema"), EPSILON);
}
public void testParseTrieDoubleRoundTrip() throws Exception {
IndexSchema schema = h.getCore().getLatestSchema();
assertNotNull(schema.getFieldOrNull("double1_td")); // should match dynamic field "*_td"
assertNotNull(schema.getFieldOrNull("double2_td")); // should match dynamic field "*_td"
double value = 10898.83491;
String doubleString1 = "10898.83491";
String doubleString2 = "10,898.83491";
SolrInputDocument d = processAdd("parse-double",
doc(f("id", "728"), f("double1_td", doubleString1), f("double2_td", doubleString2)));
assertNotNull(d);
assertTrue(d.getFieldValue("double1_td") instanceof Double);
assertEquals(value, (Double)d.getFieldValue("double1_td"), EPSILON);
assertTrue(d.getFieldValue("double2_td") instanceof Double);
assertEquals(value, (Double)d.getFieldValue("double2_td"), EPSILON);
assertU(commit());
assertQ(req("id:728")
,"//double[@name='double1_td'][.='" + value + "']"
,"//double[@name='double2_td'][.='" + value + "']");
}
public void testFailedParseMixedDouble() throws Exception {
IndexSchema schema = h.getCore().getLatestSchema();
assertNull(schema.getFieldOrNull("not_in_schema"));
Map<Object,Object> mixed = new HashMap<Object,Object>();
Long longVal = 294423L;
mixed.put(85, "85.0");
mixed.put(longVal, longVal); // Float-typed field value
mixed.put(-2894.518, "-2,894.518");
mixed.put(187947.2193, "187,947.2193");
SolrInputDocument d = processAdd("parse-double-no-run-processor",
doc(f("id", "7206"), f("not_in_schema", mixed.values())));
assertNotNull(d);
boolean foundLong = false;
for (Object o : d.getFieldValues("not_in_schema")) {
if (longVal == o) {
foundLong = true;
} else {
assertTrue(o instanceof String);
}
mixed.values().remove(o);
}
assertTrue(foundLong);
assertTrue(mixed.isEmpty());
}
public void testParseBooleanRoundTrip() throws Exception {
IndexSchema schema = h.getCore().getLatestSchema();
assertNotNull(schema.getFieldOrNull("boolean1_b")); // should match dynamic field "*_b"
assertNotNull(schema.getFieldOrNull("boolean2_b")); // should match dynamic field "*_b"
boolean value1 = true;
boolean value2 = false;
SolrInputDocument d = processAdd("parse-boolean",
doc(f("id", "141"), f("boolean1_b", value1), f("boolean2_b", value2)));
assertNotNull(d);
assertTrue(d.getFieldValue("boolean1_b") instanceof Boolean);
assertEquals(value1, d.getFieldValue("boolean1_b"));
assertTrue(d.getFieldValue("boolean2_b") instanceof Boolean);
assertEquals(value2, d.getFieldValue("boolean2_b"));
assertU(commit());
assertQ(req("id:141")
,"//bool[@name='boolean1_b'][.='" + value1 + "']"
,"//bool[@name='boolean2_b'][.='" + value2 + "']");
}
public void testParseAlternateValueBooleans() throws Exception {
IndexSchema schema = h.getCore().getLatestSchema();
assertNotNull(schema.getFieldOrNull("boolean1_b")); // should match dynamic field "*_b"
assertNotNull(schema.getFieldOrNull("boolean2_b")); // should match dynamic field "*_b"
assertNotNull(schema.getFieldOrNull("boolean3_b")); // should match dynamic field "*_b"
assertNotNull(schema.getFieldOrNull("boolean4_b")); // should match dynamic field "*_b"
assertNotNull(schema.getFieldOrNull("boolean5_b")); // should match dynamic field "*_b"
assertNull(schema.getFieldOrNull("not_in_schema"));
boolean[] values = { true, true, true, false, false, false };
String[] stringValues = { "on", "yes", "True", "Off", "no", "FALSE" };
String[] fieldNames = { "boolean1_b", "boolean2_b", "boolean3_b", "boolean4_b", "boolean5_b", "not_in_schema" };
SolrInputDocument d = doc(f("id", "55"));
for (int i = 0 ; i < values.length ; ++i) {
d.addField(fieldNames[i], stringValues[i]);
}
d = processAdd("parse-boolean-alternate-values-no-run-processor", d);
assertNotNull(d);
for (int i = 0 ; i < values.length ; ++i) {
assertTrue(d.getFieldValue(fieldNames[i]) instanceof Boolean);
assertEquals(values[i], d.getFieldValue(fieldNames[i]));
}
}
public void testParseAlternateSingleValuesBooleans() throws Exception {
IndexSchema schema = h.getCore().getLatestSchema();
assertNotNull(schema.getFieldOrNull("boolean1_b")); // should match dynamic field "*_b"
assertNotNull(schema.getFieldOrNull("boolean2_b")); // should match dynamic field "*_b"
boolean[] values = { true, false };
String[] stringValues = { "yup", "nope" };
String[] fieldNames = { "boolean1_b", "boolean2_b" };
SolrInputDocument d = doc(f("id", "59"));
for (int i = 0 ; i < values.length ; ++i) {
d.addField(fieldNames[i], stringValues[i]);
}
d = processAdd("parse-boolean-alternate-single-values-no-run-processor", d);
assertNotNull(d);
for (int i = 0 ; i < values.length ; ++i) {
assertTrue(d.getFieldValue(fieldNames[i]) instanceof Boolean);
assertEquals(values[i], d.getFieldValue(fieldNames[i]));
}
// Standard boolean values should not be mutated, since they're not configured
stringValues = new String[] { "true", "false" };
d = doc(f("id", "593"));
for (int i = 0 ; i < values.length ; ++i) {
d.addField(fieldNames[i], stringValues[i]);
}
d = processAdd("parse-boolean-alternate-single-values-no-run-processor", d);
assertNotNull(d);
for (int i = 0 ; i < values.length ; ++i) {
assertTrue(d.getFieldValue(fieldNames[i]) instanceof String);
}
}
public void testFailedParseMixedBoolean() throws Exception {
IndexSchema schema = h.getCore().getLatestSchema();
assertNull(schema.getFieldOrNull("not_in_schema"));
Map<Object,Object> mixed = new HashMap<Object,Object>();
Long longVal = 294423L;
mixed.put(true, "true");
mixed.put(longVal, longVal); // Float-typed field value
mixed.put(false, "false");
mixed.put(true, "true");
SolrInputDocument d = processAdd("parse-boolean-no-run-processor",
doc(f("id", "7207"), f("not_in_schema", mixed.values())));
assertNotNull(d);
boolean foundLong = false;
for (Object o : d.getFieldValues("not_in_schema")) {
if (longVal == o) {
foundLong = true;
} else {
assertTrue(o instanceof String);
}
mixed.values().remove(o);
}
assertTrue(foundLong);
assertTrue(mixed.isEmpty());
}
public void testCascadingParsers() throws Exception {
IndexSchema schema = h.getCore().getLatestSchema();
final String fieldName = "not_in_schema";
assertNull(schema.getFieldOrNull(fieldName));
SolrInputDocument d = null;
String chain = "cascading-parsers-no-run-processor";
Map<Boolean,String> booleans = new HashMap<Boolean,String>();
booleans.put(true, "truE");
booleans.put(false, "False");
d = processAdd(chain, doc(f("id", "341"), f(fieldName, booleans.values())));
assertNotNull(d);
for (Object o : d.getFieldValues(fieldName)) {
assertTrue(o instanceof Boolean);
booleans.remove(o);
}
assertTrue(booleans.isEmpty());
Map<Integer,String> ints = new HashMap<Integer,String>();
ints.put(2, "2");
ints.put(50928, "50928");
ints.put(86942008, "86,942,008");
d = processAdd(chain, doc(f("id", "333"), f(fieldName, ints.values())));
assertNotNull(d);
for (Object o : d.getFieldValues(fieldName)) {
assertTrue(o instanceof Integer);
ints.remove(o);
}
assertTrue(ints.isEmpty());
Map<Long,String> longs = new HashMap<Long,String>();
longs.put(2L, "2");
longs.put(50928L, "50928");
longs.put(86942008987654L, "86,942,008,987,654");
d = processAdd(chain, doc(f("id", "342"), f(fieldName, longs.values())));
assertNotNull(d);
for (Object o : d.getFieldValues(fieldName)) {
assertTrue(o instanceof Long);
longs.remove(o);
}
assertTrue(longs.isEmpty());
/*
// Disabling this test because unlike Integer/Long, Float parsing can perform
// rounding to make values fit. See
Map<Float,String> floats = new HashMap<Float,String>();
floats.put(2.0, "2.");
floats.put(509.28, "509.28");
floats.put(86942.008, "86,942.008");
d = processAdd(chain, doc(f("id", "342"), f(fieldName, floats.values())));
assertNotNull(d);
for (Object o : d.getFieldValues(fieldName)) {
assertTrue(o instanceof float);
longs.remove(o);
}
*/
Map<Double,String> doubles = new HashMap<Double,String>();
doubles.put(2.0, "2.");
doubles.put(509.28, "509.28");
doubles.put(86942.008, "86,942.008");
d = processAdd(chain, doc(f("id", "342"), f(fieldName, doubles.values())));
assertNotNull(d);
for (Object o : d.getFieldValues(fieldName)) {
assertTrue(o instanceof Double);
longs.remove(o);
}
DateTimeFormatter dateTimeFormatter = ISODateTimeFormat.dateOptionalTimeParser().withZoneUTC();
Map<Date,String> dates = new HashMap<Date,String>();
String[] dateStrings = { "2020-05-13T18:47", "1989-12-14", "1682-07-22T18:33:00.000Z" };
for (String dateString : dateStrings) {
dates.put(dateTimeFormatter.parseDateTime(dateString).toDate(), dateString);
}
d = processAdd(chain, doc(f("id", "343"), f(fieldName, dates.values())));
assertNotNull(d);
for (Object o : d.getFieldValues(fieldName)) {
assertTrue(o instanceof Date);
dates.remove(o);
}
assertTrue(dates.isEmpty());
Map<Double,String> mixedLongsAndDoubles = new LinkedHashMap<Double,String>(); // preserve order
mixedLongsAndDoubles.put(85.0, "85");
mixedLongsAndDoubles.put(2.94423E-9, "2.94423E-9");
mixedLongsAndDoubles.put(2894518.0, "2,894,518");
mixedLongsAndDoubles.put(48794721.937, "48,794,721.937");
d = processAdd(chain, doc(f("id", "344"), f(fieldName, mixedLongsAndDoubles.values())));
assertNotNull(d);
for (Object o : d.getFieldValues(fieldName)) {
assertTrue(o instanceof Double);
mixedLongsAndDoubles.remove(o);
}
assertTrue(mixedLongsAndDoubles.isEmpty());
Set<String> mixed = new HashSet<String>();
mixed.add("true");
mixed.add("1682-07-22T18:33:00.000Z");
mixed.add("2,894,518");
mixed.add("308,393,131,379,900");
mixed.add("48,794,721.937");
d = processAdd(chain, doc(f("id", "345"), f(fieldName, mixed)));
assertNotNull(d);
for (Object o : d.getFieldValues(fieldName)) {
assertTrue(o instanceof String);
}
Map<Double,Object> mixedDoubles = new LinkedHashMap<Double,Object>(); // preserve order
mixedDoubles.put(85.0, "85");
mixedDoubles.put(2.94423E-9, 2.94423E-9); // Double-typed field value
mixedDoubles.put(2894518.0, "2,894,518");
mixedDoubles.put(48794721.937, "48,794,721.937");
d = processAdd(chain, doc(f("id", "3391"), f(fieldName, mixedDoubles.values())));
assertNotNull(d);
for (Object o : d.getFieldValues(fieldName)) {
assertTrue(o instanceof Double);
mixedDoubles.remove(o);
}
assertTrue(mixedDoubles.isEmpty());
Map<Integer,Object> mixedInts = new LinkedHashMap<Integer,Object>(); // preserve order
mixedInts.put(85, "85");
mixedInts.put(294423, 294423); // Integer-typed field value
mixedInts.put(-2894518, "-2,894,518");
mixedInts.put(1879472193, "1,879,472,193");
d = processAdd(chain, doc(f("id", "3392"), f(fieldName, mixedInts.values())));
assertNotNull(d);
for (Object o : d.getFieldValues(fieldName)) {
assertTrue(o instanceof Integer);
mixedInts.remove(o);
}
assertTrue(mixedInts.isEmpty());
Map<Long,Object> mixedLongs = new LinkedHashMap<Long,Object>(); // preserve order
mixedLongs.put(85L, "85");
mixedLongs.put(42944233L, 42944233L); // Long-typed field value
mixedLongs.put(2894518L, "2,894,518");
mixedLongs.put(48794721937L, "48,794,721,937");
d = processAdd(chain, doc(f("id", "3393"), f(fieldName, mixedLongs.values())));
assertNotNull(d);
for (Object o : d.getFieldValues(fieldName)) {
assertTrue(o instanceof Long);
mixedLongs.remove(o);
}
assertTrue(mixedLongs.isEmpty());
Map<Boolean,Object> mixedBooleans = new LinkedHashMap<Boolean,Object>(); // preserve order
mixedBooleans.put(true, "true");
mixedBooleans.put(false, false); // Boolean-typed field value
mixedBooleans.put(false, "false");
mixedBooleans.put(true, "true");
d = processAdd(chain, doc(f("id", "3394"), f(fieldName, mixedBooleans.values())));
assertNotNull(d);
for (Object o : d.getFieldValues(fieldName)) {
assertTrue(o instanceof Boolean);
mixedBooleans.remove(o);
}
assertTrue(mixedBooleans.isEmpty());
dateTimeFormatter = ISODateTimeFormat.dateOptionalTimeParser().withZoneUTC();
Map<Date,Object> mixedDates = new HashMap<Date,Object>();
dateStrings = new String[] { "2020-05-13T18:47", "1989-12-14", "1682-07-22T18:33:00.000Z" };
for (String dateString : dateStrings) {
mixedDates.put(dateTimeFormatter.parseDateTime(dateString).toDate(), dateString);
}
Date extraDate = dateTimeFormatter.parseDateTime("2003-04-24").toDate();
mixedDates.put(extraDate, extraDate); // Date-typed field value
d = processAdd(chain, doc(f("id", "3395"), f(fieldName, mixedDates.values())));
assertNotNull(d);
for (Object o : d.getFieldValues(fieldName)) {
assertTrue(o instanceof Date);
mixedDates.remove(o);
}
assertTrue(mixedDates.isEmpty());
}
}

View File

@ -0,0 +1 @@
a5f29a7acaddea3f4af307e8cf2d0cc82645fd7d

View File

@ -0,0 +1,202 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -0,0 +1,5 @@
=============================================================================
= NOTICE file corresponding to section 4d of the Apache License Version 2.0 =
=============================================================================
This product includes software developed by
Joda.org (http://www.joda.org/).