From d6d65851cf4a7d1f9e5f0b58544374785e7c2594 Mon Sep 17 00:00:00 2001 From: Steven Rowe Date: Thu, 27 Jun 2013 00:44:27 +0000 Subject: [PATCH] SOLR-4892: Add field update processors to parse/convert String-typed fields to Date, Number, and Boolean git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1497165 13f79535-47bb-0310-9956-ffa450edef68 --- dev-tools/maven/pom.xml.template | 5 + .../maven/solr/core/src/java/pom.xml.template | 4 + solr/CHANGES.txt | 12 + solr/core/ivy.xml | 1 + .../org/apache/solr/schema/DateField.java | 2 +- .../solr/schema/DateValueFieldType.java | 24 + .../org/apache/solr/schema/DoubleField.java | 2 +- .../solr/schema/DoubleValueFieldType.java | 24 + .../org/apache/solr/schema/FloatField.java | 2 +- .../solr/schema/FloatValueFieldType.java | 24 + .../java/org/apache/solr/schema/IntField.java | 2 +- .../apache/solr/schema/IntValueFieldType.java | 25 + .../org/apache/solr/schema/LongField.java | 2 +- .../solr/schema/LongValueFieldType.java | 24 + .../solr/schema/NumericValueFieldType.java | 24 + .../solr/schema/SortableDoubleField.java | 2 +- .../solr/schema/SortableFloatField.java | 2 +- .../apache/solr/schema/SortableIntField.java | 2 +- .../org/apache/solr/schema/TrieDateField.java | 2 +- .../apache/solr/schema/TrieDoubleField.java | 2 +- .../apache/solr/schema/TrieFloatField.java | 2 +- .../org/apache/solr/schema/TrieIntField.java | 2 +- .../org/apache/solr/schema/TrieLongField.java | 2 +- ...uesOrNoneFieldMutatingUpdateProcessor.java | 116 +++ .../FieldMutatingUpdateProcessor.java | 4 +- .../FieldMutatingUpdateProcessorFactory.java | 1 - ...rseBooleanFieldUpdateProcessorFactory.java | 157 +++ .../ParseDateFieldUpdateProcessorFactory.java | 179 ++++ ...arseDoubleFieldUpdateProcessorFactory.java | 122 +++ ...ParseFloatFieldUpdateProcessorFactory.java | 123 +++ .../ParseIntFieldUpdateProcessorFactory.java | 124 +++ .../ParseLongFieldUpdateProcessorFactory.java | 118 +++ ...rseNumericFieldUpdateProcessorFactory.java | 83 ++ ...config-parsing-update-processor-chains.xml | 230 +++++ .../ParsingFieldUpdateProcessorsTest.java | 910 ++++++++++++++++++ solr/licenses/joda-time-2.2.jar.sha1 | 1 + solr/licenses/joda-time-LICENSE-ASL.txt | 202 ++++ solr/licenses/joda-time-NOTICE.txt | 5 + 38 files changed, 2552 insertions(+), 16 deletions(-) create mode 100644 solr/core/src/java/org/apache/solr/schema/DateValueFieldType.java create mode 100644 solr/core/src/java/org/apache/solr/schema/DoubleValueFieldType.java create mode 100644 solr/core/src/java/org/apache/solr/schema/FloatValueFieldType.java create mode 100644 solr/core/src/java/org/apache/solr/schema/IntValueFieldType.java create mode 100644 solr/core/src/java/org/apache/solr/schema/LongValueFieldType.java create mode 100644 solr/core/src/java/org/apache/solr/schema/NumericValueFieldType.java create mode 100644 solr/core/src/java/org/apache/solr/update/processor/AllValuesOrNoneFieldMutatingUpdateProcessor.java create mode 100644 solr/core/src/java/org/apache/solr/update/processor/ParseBooleanFieldUpdateProcessorFactory.java create mode 100644 solr/core/src/java/org/apache/solr/update/processor/ParseDateFieldUpdateProcessorFactory.java create mode 100644 solr/core/src/java/org/apache/solr/update/processor/ParseDoubleFieldUpdateProcessorFactory.java create mode 100644 solr/core/src/java/org/apache/solr/update/processor/ParseFloatFieldUpdateProcessorFactory.java create mode 100644 solr/core/src/java/org/apache/solr/update/processor/ParseIntFieldUpdateProcessorFactory.java create mode 100644 solr/core/src/java/org/apache/solr/update/processor/ParseLongFieldUpdateProcessorFactory.java create mode 100644 solr/core/src/java/org/apache/solr/update/processor/ParseNumericFieldUpdateProcessorFactory.java create mode 100644 solr/core/src/test-files/solr/collection1/conf/solrconfig-parsing-update-processor-chains.xml create mode 100644 solr/core/src/test/org/apache/solr/update/processor/ParsingFieldUpdateProcessorsTest.java create mode 100644 solr/licenses/joda-time-2.2.jar.sha1 create mode 100644 solr/licenses/joda-time-LICENSE-ASL.txt create mode 100644 solr/licenses/joda-time-NOTICE.txt diff --git a/dev-tools/maven/pom.xml.template b/dev-tools/maven/pom.xml.template index 57ac6da0e77..fb32c87ca05 100644 --- a/dev-tools/maven/pom.xml.template +++ b/dev-tools/maven/pom.xml.template @@ -182,6 +182,11 @@ commons-io ${commons-io.version} + + joda-time + joda-time + 2.2 + org.apache.httpcomponents httpclient diff --git a/dev-tools/maven/solr/core/src/java/pom.xml.template b/dev-tools/maven/solr/core/src/java/pom.xml.template index 7255b439358..d22f524c034 100644 --- a/dev-tools/maven/solr/core/src/java/pom.xml.template +++ b/dev-tools/maven/solr/core/src/java/pom.xml.template @@ -136,6 +136,10 @@ commons-fileupload commons-fileupload + + joda-time + joda-time + org.restlet.jee org.restlet diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index ae64994a227..190b25b5622 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -120,6 +120,18 @@ New Features * SOLR-4916: Add support to write and read Solr index files and transaction log files to and from HDFS. (phunt, Mark Miller, Greg Chanan) + +* SOLR-4892: Add FieldMutatingUpdateProcessorFactory subclasses + Parse{Date,Integer,Long,Float,Double,Boolean}UpdateProcessorFactory. These + factories have a default selector that matches all fields that either don’t + match any schema field, or are in the schema with the corresponding + typeClass. If they see a value that is not a CharSequence, or can't parse + the value, they leave it as is. For multi-valued fields, these processors + will not convert any values unless all are first successfully parsed, or + already are instances of the target class. Ordering the processors, e.g. + [Boolean, Long, Double, Date] will allow e.g. values ["2", "5", "8.6"] to + be left alone by the Boolean and Long processors, but then converted by the + Double processor. (Steve Rowe, hossman) Bug Fixes ---------------------- diff --git a/solr/core/ivy.xml b/solr/core/ivy.xml index d028f35801d..b1d974b2960 100644 --- a/solr/core/ivy.xml +++ b/solr/core/ivy.xml @@ -35,6 +35,7 @@ + diff --git a/solr/core/src/java/org/apache/solr/schema/DateField.java b/solr/core/src/java/org/apache/solr/schema/DateField.java index 1001b350e45..faf9c4ff93f 100644 --- a/solr/core/src/java/org/apache/solr/schema/DateField.java +++ b/solr/core/src/java/org/apache/solr/schema/DateField.java @@ -111,7 +111,7 @@ import java.util.*; * @see XML schema part 2 * @deprecated {@link TrieDateField} is recomended for all new schemas */ -public class DateField extends PrimitiveFieldType { +public class DateField extends PrimitiveFieldType implements DateValueFieldType { public static TimeZone UTC = TimeZone.getTimeZone("UTC"); diff --git a/solr/core/src/java/org/apache/solr/schema/DateValueFieldType.java b/solr/core/src/java/org/apache/solr/schema/DateValueFieldType.java new file mode 100644 index 00000000000..c4e7984ec9c --- /dev/null +++ b/solr/core/src/java/org/apache/solr/schema/DateValueFieldType.java @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.schema; + +/** + * Marker interface for Date-valued field types. + */ +public interface DateValueFieldType { +} diff --git a/solr/core/src/java/org/apache/solr/schema/DoubleField.java b/solr/core/src/java/org/apache/solr/schema/DoubleField.java index c76b99c2592..550adae0a91 100644 --- a/solr/core/src/java/org/apache/solr/schema/DoubleField.java +++ b/solr/core/src/java/org/apache/solr/schema/DoubleField.java @@ -45,7 +45,7 @@ import org.apache.solr.search.QParser; * * @see TrieDoubleField */ -public class DoubleField extends PrimitiveFieldType { +public class DoubleField extends PrimitiveFieldType implements DoubleValueFieldType { private static final FieldCache.DoubleParser PARSER = new FieldCache.DoubleParser() { diff --git a/solr/core/src/java/org/apache/solr/schema/DoubleValueFieldType.java b/solr/core/src/java/org/apache/solr/schema/DoubleValueFieldType.java new file mode 100644 index 00000000000..ff9712e22db --- /dev/null +++ b/solr/core/src/java/org/apache/solr/schema/DoubleValueFieldType.java @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.schema; + +/** + * Marker interface for double-valued field types. + */ +public interface DoubleValueFieldType extends NumericValueFieldType { +} diff --git a/solr/core/src/java/org/apache/solr/schema/FloatField.java b/solr/core/src/java/org/apache/solr/schema/FloatField.java index 2c1ed4dbb44..7e23443852d 100644 --- a/solr/core/src/java/org/apache/solr/schema/FloatField.java +++ b/solr/core/src/java/org/apache/solr/schema/FloatField.java @@ -46,7 +46,7 @@ import java.io.IOException; * * @see TrieFloatField */ -public class FloatField extends PrimitiveFieldType { +public class FloatField extends PrimitiveFieldType implements FloatValueFieldType { private static final FieldCache.FloatParser PARSER = new FieldCache.FloatParser() { diff --git a/solr/core/src/java/org/apache/solr/schema/FloatValueFieldType.java b/solr/core/src/java/org/apache/solr/schema/FloatValueFieldType.java new file mode 100644 index 00000000000..5606caf9053 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/schema/FloatValueFieldType.java @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.schema; + +/** + * Marker interface for float-valued field types. + */ +public interface FloatValueFieldType extends NumericValueFieldType { +} diff --git a/solr/core/src/java/org/apache/solr/schema/IntField.java b/solr/core/src/java/org/apache/solr/schema/IntField.java index 85e3c3b3b7d..2b14867b8aa 100644 --- a/solr/core/src/java/org/apache/solr/schema/IntField.java +++ b/solr/core/src/java/org/apache/solr/schema/IntField.java @@ -46,7 +46,7 @@ import java.io.IOException; * * @see TrieIntField */ -public class IntField extends PrimitiveFieldType { +public class IntField extends PrimitiveFieldType implements IntValueFieldType { private static final FieldCache.IntParser PARSER = new FieldCache.IntParser() { diff --git a/solr/core/src/java/org/apache/solr/schema/IntValueFieldType.java b/solr/core/src/java/org/apache/solr/schema/IntValueFieldType.java new file mode 100644 index 00000000000..9cf81e17be5 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/schema/IntValueFieldType.java @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.schema; + +/** + * Marker interface for int-valued field types. + */ +public interface IntValueFieldType extends NumericValueFieldType { +} + diff --git a/solr/core/src/java/org/apache/solr/schema/LongField.java b/solr/core/src/java/org/apache/solr/schema/LongField.java index 93f3389c2b1..5b18db8119e 100644 --- a/solr/core/src/java/org/apache/solr/schema/LongField.java +++ b/solr/core/src/java/org/apache/solr/schema/LongField.java @@ -46,7 +46,7 @@ import java.util.Map; * * @see TrieLongField */ -public class LongField extends PrimitiveFieldType { +public class LongField extends PrimitiveFieldType implements LongValueFieldType { private static final FieldCache.LongParser PARSER = new FieldCache.LongParser() { diff --git a/solr/core/src/java/org/apache/solr/schema/LongValueFieldType.java b/solr/core/src/java/org/apache/solr/schema/LongValueFieldType.java new file mode 100644 index 00000000000..55b8b5113da --- /dev/null +++ b/solr/core/src/java/org/apache/solr/schema/LongValueFieldType.java @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.schema; + +/** + * Marker interface for long-valued field types. + */ +public interface LongValueFieldType extends NumericValueFieldType { +} diff --git a/solr/core/src/java/org/apache/solr/schema/NumericValueFieldType.java b/solr/core/src/java/org/apache/solr/schema/NumericValueFieldType.java new file mode 100644 index 00000000000..e2238299f02 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/schema/NumericValueFieldType.java @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.schema; + +/** + * Marker interface for numeric-valued field types. + */ +public interface NumericValueFieldType { +} diff --git a/solr/core/src/java/org/apache/solr/schema/SortableDoubleField.java b/solr/core/src/java/org/apache/solr/schema/SortableDoubleField.java index 65436f4bd6f..2f4b0a65dda 100644 --- a/solr/core/src/java/org/apache/solr/schema/SortableDoubleField.java +++ b/solr/core/src/java/org/apache/solr/schema/SortableDoubleField.java @@ -54,7 +54,7 @@ import java.io.IOException; * @deprecated use {@link DoubleField} or {@link TrieDoubleField} - will be removed in 5.x */ @Deprecated -public class SortableDoubleField extends PrimitiveFieldType { +public class SortableDoubleField extends PrimitiveFieldType implements DoubleValueFieldType { @Override public SortField getSortField(SchemaField field,boolean reverse) { return getStringSort(field,reverse); diff --git a/solr/core/src/java/org/apache/solr/schema/SortableFloatField.java b/solr/core/src/java/org/apache/solr/schema/SortableFloatField.java index 69db7616b2f..e66e25563e7 100644 --- a/solr/core/src/java/org/apache/solr/schema/SortableFloatField.java +++ b/solr/core/src/java/org/apache/solr/schema/SortableFloatField.java @@ -55,7 +55,7 @@ import java.io.IOException; * @deprecated use {@link FloatField} or {@link TrieFloatField} - will be removed in 5.x */ @Deprecated -public class SortableFloatField extends PrimitiveFieldType { +public class SortableFloatField extends PrimitiveFieldType implements FloatValueFieldType { @Override public SortField getSortField(SchemaField field,boolean reverse) { return getStringSort(field,reverse); diff --git a/solr/core/src/java/org/apache/solr/schema/SortableIntField.java b/solr/core/src/java/org/apache/solr/schema/SortableIntField.java index cbcb913ea84..955857370f9 100644 --- a/solr/core/src/java/org/apache/solr/schema/SortableIntField.java +++ b/solr/core/src/java/org/apache/solr/schema/SortableIntField.java @@ -55,7 +55,7 @@ import java.io.IOException; * @deprecated use {@link IntField} or {@link TrieIntField} - will be removed in 5.x */ @Deprecated -public class SortableIntField extends PrimitiveFieldType { +public class SortableIntField extends PrimitiveFieldType implements IntValueFieldType { @Override public SortField getSortField(SchemaField field,boolean reverse) { return getStringSort(field,reverse); diff --git a/solr/core/src/java/org/apache/solr/schema/TrieDateField.java b/solr/core/src/java/org/apache/solr/schema/TrieDateField.java index 9012e54944b..0a652efb44c 100755 --- a/solr/core/src/java/org/apache/solr/schema/TrieDateField.java +++ b/solr/core/src/java/org/apache/solr/schema/TrieDateField.java @@ -55,7 +55,7 @@ import java.io.IOException; * @see DateField * @see TrieField */ -public class TrieDateField extends DateField { +public class TrieDateField extends DateField implements DateValueFieldType { final TrieField wrappedField = new TrieField() {{ type = TrieTypes.DATE; diff --git a/solr/core/src/java/org/apache/solr/schema/TrieDoubleField.java b/solr/core/src/java/org/apache/solr/schema/TrieDoubleField.java index a8884b4d22d..1f0da8aaaa8 100755 --- a/solr/core/src/java/org/apache/solr/schema/TrieDoubleField.java +++ b/solr/core/src/java/org/apache/solr/schema/TrieDoubleField.java @@ -33,7 +33,7 @@ package org.apache.solr.schema; * @see Double * @see Java Language Specification, s4.2.3 */ -public class TrieDoubleField extends TrieField { +public class TrieDoubleField extends TrieField implements DoubleValueFieldType { { type=TrieTypes.DOUBLE; } diff --git a/solr/core/src/java/org/apache/solr/schema/TrieFloatField.java b/solr/core/src/java/org/apache/solr/schema/TrieFloatField.java index 2ea1d142940..1163d7285f1 100755 --- a/solr/core/src/java/org/apache/solr/schema/TrieFloatField.java +++ b/solr/core/src/java/org/apache/solr/schema/TrieFloatField.java @@ -33,7 +33,7 @@ package org.apache.solr.schema; * @see Float * @see Java Language Specification, s4.2.3 */ -public class TrieFloatField extends TrieField { +public class TrieFloatField extends TrieField implements FloatValueFieldType { { type=TrieTypes.FLOAT; } diff --git a/solr/core/src/java/org/apache/solr/schema/TrieIntField.java b/solr/core/src/java/org/apache/solr/schema/TrieIntField.java index 4cc29990a57..e49f59a7fe6 100755 --- a/solr/core/src/java/org/apache/solr/schema/TrieIntField.java +++ b/solr/core/src/java/org/apache/solr/schema/TrieIntField.java @@ -27,7 +27,7 @@ package org.apache.solr.schema; * * @see Integer */ -public class TrieIntField extends TrieField { +public class TrieIntField extends TrieField implements IntValueFieldType { { type=TrieTypes.INTEGER; } diff --git a/solr/core/src/java/org/apache/solr/schema/TrieLongField.java b/solr/core/src/java/org/apache/solr/schema/TrieLongField.java index c20f252dcac..052e4a72c86 100755 --- a/solr/core/src/java/org/apache/solr/schema/TrieLongField.java +++ b/solr/core/src/java/org/apache/solr/schema/TrieLongField.java @@ -27,7 +27,7 @@ package org.apache.solr.schema; * * @see Long */ -public class TrieLongField extends TrieField { +public class TrieLongField extends TrieField implements LongValueFieldType { { type=TrieTypes.LONG; } diff --git a/solr/core/src/java/org/apache/solr/update/processor/AllValuesOrNoneFieldMutatingUpdateProcessor.java b/solr/core/src/java/org/apache/solr/update/processor/AllValuesOrNoneFieldMutatingUpdateProcessor.java new file mode 100644 index 00000000000..58969b079ea --- /dev/null +++ b/solr/core/src/java/org/apache/solr/update/processor/AllValuesOrNoneFieldMutatingUpdateProcessor.java @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.update.processor; + +import org.apache.solr.common.SolrInputField; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.List; +import java.util.Locale; + +/** + * Abstract subclass of FieldMutatingUpdateProcessor for implementing + * UpdateProcessors that will mutate all individual values of a selected + * field independently. If not all individual values are acceptable + * - i.e., mutateValue(srcVal) returns {@link #SKIP_FIELD_VALUE_LIST_SINGLETON} + * for at least one value - then none of the values are mutated: + * mutate(srcField) will return srcField. + * + * @see FieldMutatingUpdateProcessorFactory + * @see FieldValueMutatingUpdateProcessor + */ +public abstract class AllValuesOrNoneFieldMutatingUpdateProcessor extends FieldMutatingUpdateProcessor { + + private static final Logger log = LoggerFactory.getLogger(AllValuesOrNoneFieldMutatingUpdateProcessor.class); + + public static final Object DELETE_VALUE_SINGLETON = new Object() { + @Override + public String toString() { + return "!!Singleton Object Triggering Value Deletion!!"; + } + }; + + public static final Object SKIP_FIELD_VALUE_LIST_SINGLETON= new Object() { + @Override + public String toString() { + return "!!Singleton Object Triggering Skipping Field Mutation!!"; + } + }; + + + public AllValuesOrNoneFieldMutatingUpdateProcessor(FieldNameSelector selector, UpdateRequestProcessor next) { + super(selector, next); + } + + /** + * Mutates individual values of a field as needed, or returns the original + * value. + * + * @param srcVal a value from a matched field which should be mutated + * @return the value to use as a replacement for src, or + * DELETE_VALUE_SINGLETON to indicate that the value + * should be removed completely, or + * SKIP_FIELD_VALUE_LIST_SINGLETON to indicate that + * a field value is not consistent with + * @see #DELETE_VALUE_SINGLETON + * @see #SKIP_FIELD_VALUE_LIST_SINGLETON + */ + protected abstract Object mutateValue(final Object srcVal); + + protected final SolrInputField mutate(final SolrInputField srcField) { + List messages = null; + SolrInputField result = new SolrInputField(srcField.getName()); + for (final Object srcVal : srcField.getValues()) { + final Object destVal = mutateValue(srcVal); + if (SKIP_FIELD_VALUE_LIST_SINGLETON == destVal) { + log.debug("field '{}' {} value '{}' is not mutatable, so no values will be mutated", + new Object[] { srcField.getName(), srcVal.getClass().getSimpleName(), srcVal }); + return srcField; + } + if (DELETE_VALUE_SINGLETON == destVal) { + if (log.isDebugEnabled()) { + if (null == messages) { + messages = new ArrayList(); + } + messages.add(String.format(Locale.ROOT, "removing value from field '%s': %s '%s'", + srcField.getName(), srcVal.getClass().getSimpleName(), srcVal)); + } + } else { + if (log.isDebugEnabled()) { + if (null == messages) { + messages = new ArrayList(); + } + messages.add(String.format(Locale.ROOT, "replace value from field '%s': %s '%s' with %s '%s'", + srcField.getName(), srcVal.getClass().getSimpleName(), srcVal, + destVal.getClass().getSimpleName(), destVal)); + } + result.addValue(destVal, 1.0F); + } + } + result.setBoost(srcField.getBoost()); + + if (null != messages && log.isDebugEnabled()) { + for (String message : messages) { + log.debug(message); + } + } + return 0 == result.getValueCount() ? null : result; + } +} diff --git a/solr/core/src/java/org/apache/solr/update/processor/FieldMutatingUpdateProcessor.java b/solr/core/src/java/org/apache/solr/update/processor/FieldMutatingUpdateProcessor.java index caf326aefbe..92fc82b216f 100644 --- a/solr/core/src/java/org/apache/solr/update/processor/FieldMutatingUpdateProcessor.java +++ b/solr/core/src/java/org/apache/solr/update/processor/FieldMutatingUpdateProcessor.java @@ -108,7 +108,7 @@ public abstract class FieldMutatingUpdateProcessor // for now, don't allow it. if (! fname.equals(dest.getName()) ) { throw new SolrException(SERVER_ERROR, - "mutute returned field with different name: " + "mutate returned field with different name: " + fname + " => " + dest.getName()); } doc.put(dest.getName(), dest); @@ -118,7 +118,7 @@ public abstract class FieldMutatingUpdateProcessor } /** - * Interface for idenfifying which fileds should be mutated + * Interface for identifying which fields should be mutated */ public static interface FieldNameSelector { public boolean shouldMutate(final String fieldName); diff --git a/solr/core/src/java/org/apache/solr/update/processor/FieldMutatingUpdateProcessorFactory.java b/solr/core/src/java/org/apache/solr/update/processor/FieldMutatingUpdateProcessorFactory.java index ae11932c1b7..72cb52d2187 100644 --- a/solr/core/src/java/org/apache/solr/update/processor/FieldMutatingUpdateProcessorFactory.java +++ b/solr/core/src/java/org/apache/solr/update/processor/FieldMutatingUpdateProcessorFactory.java @@ -65,7 +65,6 @@ import org.apache.solr.util.plugin.SolrCoreAware; *

*
    *
  • fieldNameMatchesSchemaField - selecting specific fields based on whether or not they match a schema field
  • - *
*

* One or more excludes <lst> params may also be specified, diff --git a/solr/core/src/java/org/apache/solr/update/processor/ParseBooleanFieldUpdateProcessorFactory.java b/solr/core/src/java/org/apache/solr/update/processor/ParseBooleanFieldUpdateProcessorFactory.java new file mode 100644 index 00000000000..9dc0e382ca9 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/update/processor/ParseBooleanFieldUpdateProcessorFactory.java @@ -0,0 +1,157 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.update.processor; + +import org.apache.solr.common.SolrException; +import org.apache.solr.common.SolrException.ErrorCode; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.core.SolrCore; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.schema.BoolField; +import org.apache.solr.schema.FieldType; +import org.apache.solr.schema.IndexSchema; + +import java.util.Arrays; +import java.util.Collection; +import java.util.HashSet; +import java.util.Locale; +import java.util.Set; + +/** + *

+ * Attempts to mutate selected fields that have only CharSequence-typed values + * into Boolean values. + *

+ *

+ * The default selection behavior is to mutate both those fields that don't match + * a schema field, as well as those fields that do match a schema field and have + * a field type that uses class solr.BooleanField. + *

+ *

+ * If all values are parseable as boolean (or are already Boolean), then the field + * will be mutated, replacing each value with its parsed Boolean equivalent; + * otherwise, no mutation will occur. + *

+ *

+ * The default true and false values are "true" and "false", respectively, and match + * case-insensitively. The following configuration changes the acceptable values, and + * requires a case-sensitive match - note that either individual <str> elements + * or <arr>-s of <str> elements may be used to specify the trueValue-s + * and falseValue-s: + *

+ * + *
+ * <processor class="solr.ParseBooleanFieldUpdateProcessorFactory">
+ *   <str name="caseSensitive">true</str>
+ *   <str name="trueValue">True</str>
+ *   <str name="trueValue">Yes</str>
+ *   <arr name="falseValue">
+ *     <str>False</str>
+ *     <str>No</str>
+ *   <:/arr>
+ * </processor>
+ */ +public class ParseBooleanFieldUpdateProcessorFactory extends FieldMutatingUpdateProcessorFactory { + private static final String TRUE_VALUES_PARAM = "trueValue"; + private static final String FALSE_VALUES_PARAM = "falseValue"; + private static final String CASE_SENSITIVE_PARAM = "caseSensitive"; + + private Set trueValues = new HashSet(Arrays.asList(new String[] { "true" })); + private Set falseValues = new HashSet(Arrays.asList(new String[] { "false" })); + private boolean caseSensitive = false; + + @Override + public UpdateRequestProcessor getInstance(SolrQueryRequest req, + SolrQueryResponse rsp, + UpdateRequestProcessor next) { + return new AllValuesOrNoneFieldMutatingUpdateProcessor(getSelector(), next) { + @Override + protected Object mutateValue(Object srcVal) { + if (srcVal instanceof CharSequence) { + String stringVal = caseSensitive ? srcVal.toString() : srcVal.toString().toLowerCase(Locale.ROOT); + if (trueValues.contains(stringVal)) { + return true; + } else if (falseValues.contains(stringVal)) { + return false; + } else { + return SKIP_FIELD_VALUE_LIST_SINGLETON; + } + } + if (srcVal instanceof Boolean) { + return srcVal; + } + return SKIP_FIELD_VALUE_LIST_SINGLETON; + } + }; + } + + @Override + public void init(NamedList args) { + Object caseSensitiveParam = args.remove(CASE_SENSITIVE_PARAM); + if (null != caseSensitiveParam) { + if (caseSensitiveParam instanceof Boolean) { + caseSensitive = (Boolean)caseSensitiveParam; + } else { + caseSensitive = Boolean.valueOf(caseSensitiveParam.toString()); + } + } + + Collection trueValuesParam = oneOrMany(args, TRUE_VALUES_PARAM); + if ( ! trueValuesParam.isEmpty()) { + trueValues.clear(); + for (String trueVal : trueValuesParam) { + trueValues.add(caseSensitive ? trueVal : trueVal.toLowerCase(Locale.ROOT)); + } + } + + Collection falseValuesParam = oneOrMany(args, FALSE_VALUES_PARAM); + if ( ! falseValuesParam.isEmpty()) { + falseValues.clear(); + for (String val : falseValuesParam) { + final String falseVal = caseSensitive ? val : val.toLowerCase(Locale.ROOT); + if (trueValues.contains(falseVal)) { + throw new SolrException(ErrorCode.SERVER_ERROR, + "Param '" + FALSE_VALUES_PARAM + "' contains a value also in param '" + TRUE_VALUES_PARAM + + "': '" + val + "'"); + } + falseValues.add(falseVal); + } + } + super.init(args); + } + + + /** + * Returns true if the field doesn't match any schema field or dynamic field, + * or if the matched field's type is BoolField + */ + @Override + public FieldMutatingUpdateProcessor.FieldNameSelector + getDefaultSelector(final SolrCore core) { + + return new FieldMutatingUpdateProcessor.FieldNameSelector() { + @Override + public boolean shouldMutate(final String fieldName) { + final IndexSchema schema = core.getLatestSchema(); + FieldType type = schema.getFieldTypeNoEx(fieldName); + return (null == type) || (type instanceof BoolField); + } + }; + } +} diff --git a/solr/core/src/java/org/apache/solr/update/processor/ParseDateFieldUpdateProcessorFactory.java b/solr/core/src/java/org/apache/solr/update/processor/ParseDateFieldUpdateProcessorFactory.java new file mode 100644 index 00000000000..05aecbf7935 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/update/processor/ParseDateFieldUpdateProcessorFactory.java @@ -0,0 +1,179 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.update.processor; + +import org.apache.commons.lang.LocaleUtils; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.core.SolrCore; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.schema.DateValueFieldType; +import org.apache.solr.schema.FieldType; +import org.apache.solr.schema.IndexSchema; +import org.joda.time.DateTime; +import org.joda.time.DateTimeZone; +import org.joda.time.format.DateTimeFormat; +import org.joda.time.format.DateTimeFormatter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Date; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; + +/** + *

+ * Attempts to mutate selected fields that have only CharSequence-typed values + * into Date values. Solr will continue to index date/times in the UTC time + * zone, but the input date/times may be expressed using other time zones, + * and will be converted to UTC when they are mutated. + *

+ *

+ * The default selection behavior is to mutate both those fields that don't match + * a schema field, as well as those fields that match a schema field with a field + * type that uses class solr.DateField or a sub-class, including solr.TrieDateField. + *

+ *

+ * If all values are parseable as dates (or are already Date), then the field will + * be mutated, replacing each value with its parsed Date equivalent; otherwise, no + * mutation will occur. + *

+ *

+ * One or more date "format" specifiers must be specified. See + * Joda-time's DateTimeFormat javadocs for a description of format strings. + *

+ *

+ * A default time zone name or offset may optionally be specified for those dates + * that don't include an explicit zone/offset. NOTE: three-letter zone + * designations like "EST" are not parseable (with the single exception of "UTC"), + * because they are ambiguous. If no default time zone is specified, UTC will be + * used. See Wikipedia's list of TZ database time zone names. + *

+ *

+ * The locale to use when parsing field values using the specified formats may + * optionally be specified. If no locale is configured, then {@link Locale#ROOT} + * will be used. The following configuration specifies the French/France locale and + * two date formats that will parse the strings "le mardi 8 janvier 2013" and + * "le 28 déc. 2010 à 15 h 30", respectively. Note that either individual <str> + * elements or <arr>-s of <str> elements may be used to specify the + * date format(s): + *

+ * + *
+ * <processor class="solr.ParseDateFieldUpdateProcessorFactory">
+ *   <str name="defaultTimeZone">Europe/Paris</str>
+ *   <str name="locale">fr_FR</str>
+ *   <arr name="format">
+ *     <str>'le' EEEE dd MMMM yyyy</str>
+ *     <str>'le' dd MMM. yyyy 'à' HH 'h' mm</str>
+ *   </arr>
+ * </processor>
+ * + *

+ * See {@link Locale} for a description of acceptable language, country (optional) + * and variant (optional) values, joined with underscore(s). + *

+ */ +public class ParseDateFieldUpdateProcessorFactory extends FieldMutatingUpdateProcessorFactory { + public static final Logger log = LoggerFactory.getLogger(ParseDateFieldUpdateProcessorFactory.class); + + private static final String FORMATS_PARAM = "format"; + private static final String DEFAULT_TIME_ZONE_PARAM = "defaultTimeZone"; + private static final String LOCALE_PARAM = "locale"; + + private Map formats = new LinkedHashMap(); + + @Override + public UpdateRequestProcessor getInstance(SolrQueryRequest req, + SolrQueryResponse rsp, + UpdateRequestProcessor next) { + return new AllValuesOrNoneFieldMutatingUpdateProcessor(getSelector(), next) { + @Override + protected Object mutateValue(Object srcVal) { + if (srcVal instanceof CharSequence) { + String srcStringVal = srcVal.toString(); + for (Map.Entry format : formats.entrySet()) { + DateTimeFormatter parser = format.getValue(); + try { + DateTime dateTime = parser.parseDateTime(srcStringVal); + return dateTime.withZone(DateTimeZone.UTC).toDate(); + } catch (IllegalArgumentException e) { + log.debug("value '{}' is not parseable with format '{}'", + new Object[] { srcStringVal, format.getKey() }); + } + } + log.debug("value '{}' was not parsed by any configured format, thus was not mutated", srcStringVal); + return SKIP_FIELD_VALUE_LIST_SINGLETON; + } + if (srcVal instanceof Date) { + return srcVal; + } + return SKIP_FIELD_VALUE_LIST_SINGLETON; + } + }; + } + + @Override + public void init(NamedList args) { + + Locale locale = Locale.ROOT; + + String localeParam = (String)args.remove(LOCALE_PARAM); + if (null != localeParam) { + locale = LocaleUtils.toLocale(localeParam); + } + + Object defaultTimeZoneParam = args.remove(DEFAULT_TIME_ZONE_PARAM); + DateTimeZone defaultTimeZone = DateTimeZone.UTC; + if (null != defaultTimeZoneParam) { + defaultTimeZone = DateTimeZone.forID(defaultTimeZoneParam.toString()); + } + + Collection formatsParam = oneOrMany(args, FORMATS_PARAM); + if (null != formatsParam) { + for (String value : formatsParam) { + formats.put(value, DateTimeFormat.forPattern(value).withZone(defaultTimeZone).withLocale(locale)); + } + } + super.init(args); + } + + /** + * Returns true if the field doesn't match any schema field or dynamic field, + * or if the matched field's type is BoolField + */ + @Override + public FieldMutatingUpdateProcessor.FieldNameSelector + getDefaultSelector(final SolrCore core) { + + return new FieldMutatingUpdateProcessor.FieldNameSelector() { + @Override + public boolean shouldMutate(final String fieldName) { + final IndexSchema schema = core.getLatestSchema(); + FieldType type = schema.getFieldTypeNoEx(fieldName); + return (null == type) || type instanceof DateValueFieldType; + } + }; + } +} diff --git a/solr/core/src/java/org/apache/solr/update/processor/ParseDoubleFieldUpdateProcessorFactory.java b/solr/core/src/java/org/apache/solr/update/processor/ParseDoubleFieldUpdateProcessorFactory.java new file mode 100644 index 00000000000..07c984caf73 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/update/processor/ParseDoubleFieldUpdateProcessorFactory.java @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.update.processor; + +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.schema.DoubleValueFieldType; +import org.apache.solr.schema.FieldType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.math.RoundingMode; +import java.text.NumberFormat; +import java.text.ParsePosition; +import java.util.Locale; + +/** + *

+ * Attempts to mutate selected fields that have only CharSequence-typed values + * into Double values. If required, rounding uses ceiling mode: + * {@link RoundingMode#CEILING}. Grouping separators (',' in the ROOT locale) + * are parsed. + *

+ *

+ * The default selection behavior is to mutate both those fields that don't match + * a schema field, as well as those fields that match a schema field with a field + * type that uses class solr.DoubleField, solr.TrieDoubleField, or + * solr.SortableDoubleField. + *

+ *

+ * If all values are parseable as double (or are already Double), then the field + * will be mutated, replacing each value with its parsed Double equivalent; + * otherwise, no mutation will occur. + *

+ *

+ * The locale to use when parsing field values, which will affect the recognized + * grouping separator and decimal characters, may optionally be specified. If + * no locale is configured, then {@link Locale#ROOT} will be used. The following + * configuration specifies the Russian/Russia locale, which will parse the string + * string "12 345,899" as double value 12345.899 (the grouping separator + * character is U+00AO NO-BREAK SPACE). + *

+ * + *
+ * <processor class="solr.ParseDoubleFieldUpdateProcessorFactory">
+ *   <str name="locale">ru_RU</str>
+ * </processor>
+ * + *

+ * See {@link Locale} for a description of acceptable language, country (optional) + * and variant (optional) values, joined with underscore(s). + *

+ */ +public class ParseDoubleFieldUpdateProcessorFactory extends ParseNumericFieldUpdateProcessorFactory { + + private static final Logger log = LoggerFactory.getLogger(ParseDoubleFieldUpdateProcessorFactory.class); + + @Override + public UpdateRequestProcessor getInstance(SolrQueryRequest req, + SolrQueryResponse rsp, + UpdateRequestProcessor next) { + return new ParseDoubleFieldUpdateProcessor(getSelector(), locale, next); + } + + private static final class ParseDoubleFieldUpdateProcessor extends AllValuesOrNoneFieldMutatingUpdateProcessor { + private final Locale locale; + // NumberFormat instances are not thread safe + private final ThreadLocal numberFormat = new ThreadLocal() { + @Override + protected NumberFormat initialValue() { + NumberFormat format = NumberFormat.getInstance(locale); + format.setParseIntegerOnly(false); + format.setRoundingMode(RoundingMode.CEILING); + return format; + } + }; + + ParseDoubleFieldUpdateProcessor(FieldNameSelector selector, Locale locale, UpdateRequestProcessor next) { + super(selector, next); + this.locale = locale; + } + + @Override + protected Object mutateValue(Object srcVal) { + if (srcVal instanceof CharSequence) { + String stringVal = srcVal.toString(); + ParsePosition pos = new ParsePosition(0); + Number number = numberFormat.get().parse(stringVal, pos); + if (pos.getIndex() != stringVal.length()) { + log.debug("value '{}' is not parseable, thus not mutated; unparsed chars: '{}'", + new Object[] { srcVal, stringVal.substring(pos.getIndex())}); + return SKIP_FIELD_VALUE_LIST_SINGLETON; + } + return number.doubleValue(); + } + if (srcVal instanceof Double) { + return srcVal; + } + return SKIP_FIELD_VALUE_LIST_SINGLETON; + } + } + + @Override + protected boolean isSchemaFieldTypeCompatible(FieldType type) { + return type instanceof DoubleValueFieldType; + } +} diff --git a/solr/core/src/java/org/apache/solr/update/processor/ParseFloatFieldUpdateProcessorFactory.java b/solr/core/src/java/org/apache/solr/update/processor/ParseFloatFieldUpdateProcessorFactory.java new file mode 100644 index 00000000000..b085b8067c5 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/update/processor/ParseFloatFieldUpdateProcessorFactory.java @@ -0,0 +1,123 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.update.processor; + +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.schema.FieldType; +import org.apache.solr.schema.FloatValueFieldType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.math.RoundingMode; +import java.text.NumberFormat; +import java.text.ParsePosition; +import java.util.Locale; + +/** + *

+ * Attempts to mutate selected fields that have only CharSequence-typed values + * into Float values. If required, rounding uses ceiling mode: + * {@link RoundingMode#CEILING}. Grouping separators (',' in the ROOT locale) + * are parsed. + *

+ *

+ * The default selection behavior is to mutate both those fields that don't match + * a schema field, as well as those fields that match a schema field with a field + * type that uses class solr.FloatField, solr.TrieFloatField, or + * solr.SortableFloatField. + *

+ *

+ * If all values are parseable as float (or are already Float), then the field + * will be mutated, replacing each value with its parsed Float equivalent; + * otherwise, no mutation will occur. + *

+ *

+ * The locale to use when parsing field values, which will affect the recognized + * grouping separator and decimal characters, may optionally be specified. If + * no locale is configured, then {@link Locale#ROOT} will be used. The following + * configuration specifies the Russian/Russia locale, which will parse the string + * "12 345,899" as 12345.899f (the grouping separator character is U+00AO NO-BREAK + * SPACE). + *

+ * + *
+ * <processor class="solr.ParseFloatFieldUpdateProcessorFactory">
+ *   <str name="locale">ru_RU</str>
+ * </processor>
+ * + *

+ * See {@link Locale} for a description of acceptable language, country (optional) + * and variant (optional) values, joined with underscore(s). + *

+ */ +public class ParseFloatFieldUpdateProcessorFactory extends ParseNumericFieldUpdateProcessorFactory { + + private static final Logger log = LoggerFactory.getLogger(ParseFloatFieldUpdateProcessorFactory.class); + + @Override + public UpdateRequestProcessor getInstance(SolrQueryRequest req, + SolrQueryResponse rsp, + UpdateRequestProcessor next) { + return new ParseFloatFieldUpdateProcessor(getSelector(), locale, next); + } + + private static class ParseFloatFieldUpdateProcessor extends AllValuesOrNoneFieldMutatingUpdateProcessor { + private final Locale locale; + + // NumberFormat instances are not thread safe + private final ThreadLocal numberFormat = new ThreadLocal() { + @Override + protected NumberFormat initialValue() { + NumberFormat format = NumberFormat.getInstance(locale); + format.setParseIntegerOnly(false); + format.setRoundingMode(RoundingMode.CEILING); + return format; + } + }; + + ParseFloatFieldUpdateProcessor(FieldNameSelector selector, Locale locale, UpdateRequestProcessor next) { + super(selector, next); + this.locale = locale; + } + + @Override + protected Object mutateValue(Object srcVal) { + if (srcVal instanceof CharSequence) { + String stringVal = srcVal.toString(); + ParsePosition pos = new ParsePosition(0); + Number number = numberFormat.get().parse(stringVal, pos); + if (pos.getIndex() != stringVal.length()) { + log.debug("value '{}' is not parseable, thus not mutated; unparsed chars: '{}'", + new Object[] { srcVal, stringVal.substring(pos.getIndex())}); + return SKIP_FIELD_VALUE_LIST_SINGLETON; + } + return number.floatValue(); + } + if (srcVal instanceof Float) { + return srcVal; + } + return SKIP_FIELD_VALUE_LIST_SINGLETON; + } + } + + @Override + protected boolean isSchemaFieldTypeCompatible(FieldType type) { + return type instanceof FloatValueFieldType; + } +} diff --git a/solr/core/src/java/org/apache/solr/update/processor/ParseIntFieldUpdateProcessorFactory.java b/solr/core/src/java/org/apache/solr/update/processor/ParseIntFieldUpdateProcessorFactory.java new file mode 100644 index 00000000000..51faad7038c --- /dev/null +++ b/solr/core/src/java/org/apache/solr/update/processor/ParseIntFieldUpdateProcessorFactory.java @@ -0,0 +1,124 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.update.processor; + +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.schema.FieldType; +import org.apache.solr.schema.IntValueFieldType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.text.NumberFormat; +import java.text.ParsePosition; +import java.util.Locale; + +/** + *

+ * Attempts to mutate selected fields that have only CharSequence-typed values + * into Integer values. Grouping separators (',' in the ROOT locale) are parsed. + *

+ *

+ * The default selection behavior is to mutate both those fields that don't match + * a schema field, as well as those fields that match a schema field with a field + * type that uses class solr.IntField, solr.TrieIntField, or + * solr.SortableIntField. + *

+ *

+ * If all values are parseable as int (or are already Integer), then the field + * will be mutated, replacing each value with its parsed Integer equivalent; + * otherwise, no mutation will occur. + *

+ *

+ * The locale to use when parsing field values, which will affect the recognized + * grouping separator character, may optionally be specified. If no locale is + * configured, then {@link Locale#ROOT} will be used. The following configuration + * specifies the Russian/Russia locale, which will parse the string "12 345 899" + * as 12345899L (the grouping separator character is U+00AO NO-BREAK SPACE). + *

+ * + *
+ * <processor class="solr.ParseIntFieldUpdateProcessorFactory">
+ *   <str name="locale">ru_RU</str>
+ * </processor>
+ * + *

+ * See {@link Locale} for a description of acceptable language, country (optional) + * and variant (optional) values, joined with underscore(s). + *

+ */ +public class ParseIntFieldUpdateProcessorFactory extends ParseNumericFieldUpdateProcessorFactory { + + private static final Logger log = LoggerFactory.getLogger(ParseIntFieldUpdateProcessorFactory.class); + + @Override + public UpdateRequestProcessor getInstance(SolrQueryRequest req, + SolrQueryResponse rsp, + UpdateRequestProcessor next) { + return new ParseIntFieldUpdateProcessor(getSelector(), locale, next); + } + + private static final class ParseIntFieldUpdateProcessor extends AllValuesOrNoneFieldMutatingUpdateProcessor { + private final Locale locale; + + // NumberFormat instances are not thread safe + private final ThreadLocal numberFormat = new ThreadLocal() { + @Override + protected NumberFormat initialValue() { + NumberFormat format = NumberFormat.getInstance(locale); + format.setParseIntegerOnly(true); + return format; + } + }; + + ParseIntFieldUpdateProcessor(FieldNameSelector selector, Locale locale, UpdateRequestProcessor next) { + super(selector, next); + this.locale = locale; + } + + @Override + protected Object mutateValue(Object srcVal) { + if (srcVal instanceof CharSequence) { + String stringVal = srcVal.toString(); + ParsePosition pos = new ParsePosition(0); + Number number = numberFormat.get().parse(stringVal, pos); + if (pos.getIndex() != stringVal.length()) { + log.debug("value '{}' is not parseable, thus not mutated; unparsed chars: '{}'", + new Object[] { srcVal, stringVal.substring(pos.getIndex())}); + return SKIP_FIELD_VALUE_LIST_SINGLETON; + } + int intValue = number.intValue(); + if (number.longValue() == (long)intValue) { + // If the high bits don't get truncated by number.intValue() + return intValue; + } + log.debug("value '{}' doesn't fit into an Integer, thus was not mutated", srcVal); + return SKIP_FIELD_VALUE_LIST_SINGLETON; + } + if (srcVal instanceof Integer) { + return srcVal; + } + return SKIP_FIELD_VALUE_LIST_SINGLETON; + } + } + + @Override + protected boolean isSchemaFieldTypeCompatible(FieldType type) { + return type instanceof IntValueFieldType; + } +} diff --git a/solr/core/src/java/org/apache/solr/update/processor/ParseLongFieldUpdateProcessorFactory.java b/solr/core/src/java/org/apache/solr/update/processor/ParseLongFieldUpdateProcessorFactory.java new file mode 100644 index 00000000000..1b0ceb5abce --- /dev/null +++ b/solr/core/src/java/org/apache/solr/update/processor/ParseLongFieldUpdateProcessorFactory.java @@ -0,0 +1,118 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.update.processor; + +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.schema.FieldType; +import org.apache.solr.schema.LongValueFieldType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.text.NumberFormat; +import java.text.ParsePosition; +import java.util.Locale; + +/** + *

+ * Attempts to mutate selected fields that have only CharSequence-typed values + * into Long values. Grouping separators (',' in the ROOT locale) are parsed. + *

+ *

+ * The default selection behavior is to mutate both those fields that don't match + * a schema field, as well as those fields that match a schema field with a field + * type that uses class solr.LongField, solr.TrieLongField, or + * solr.SortableLongField. + *

+ *

+ * If all values are parseable as long (or are already Long), then the field + * will be mutated, replacing each value with its parsed Long equivalent; + * otherwise, no mutation will occur. + *

+ *

+ * The locale to use when parsing field values, which will affect the recognized + * grouping separator character, may optionally be specified. If no locale is + * configured, then {@link Locale#ROOT} will be used. The following configuration + * specifies the Russian/Russia locale, which will parse the string "12 345 899" + * as 12345899L (the grouping separator character is U+00AO NO-BREAK SPACE). + *

+ * + *
+ * <processor class="solr.ParseLongFieldUpdateProcessorFactory">
+ *   <str name="locale">ru_RU</str>
+ * </processor>
+ * + *

+ * See {@link Locale} for a description of acceptable language, country (optional) + * and variant (optional) values, joined with underscore(s). + *

+ */ +public class ParseLongFieldUpdateProcessorFactory extends ParseNumericFieldUpdateProcessorFactory { + + private static final Logger log = LoggerFactory.getLogger(ParseLongFieldUpdateProcessorFactory.class); + + @Override + public UpdateRequestProcessor getInstance(SolrQueryRequest req, + SolrQueryResponse rsp, + UpdateRequestProcessor next) { + return new ParseLongFieldUpdateProcessor(getSelector(), locale, next); + } + + private static class ParseLongFieldUpdateProcessor extends AllValuesOrNoneFieldMutatingUpdateProcessor { + private final Locale locale; + + // NumberFormat instances are not thread safe + private final ThreadLocal numberFormat = new ThreadLocal() { + @Override + protected NumberFormat initialValue() { + NumberFormat format = NumberFormat.getInstance(locale); + format.setParseIntegerOnly(true); + return format; + } + }; + + ParseLongFieldUpdateProcessor(FieldNameSelector selector, Locale locale, UpdateRequestProcessor next) { + super(selector, next); + this.locale = locale; + } + + @Override + protected Object mutateValue(Object srcVal) { + if (srcVal instanceof CharSequence) { + String stringVal = srcVal.toString(); + ParsePosition pos = new ParsePosition(0); + Number number = numberFormat.get().parse(stringVal, pos); + if (pos.getIndex() != stringVal.length()) { + log.debug("value '{}' is not parseable, thus not mutated; unparsed chars: '{}'", + new Object[] { srcVal, stringVal.substring(pos.getIndex())}); + return SKIP_FIELD_VALUE_LIST_SINGLETON; + } + return number.longValue(); + } + if (srcVal instanceof Long) { + return srcVal; + } + return SKIP_FIELD_VALUE_LIST_SINGLETON; + } + } + + @Override + protected boolean isSchemaFieldTypeCompatible(FieldType type) { + return type instanceof LongValueFieldType; + } +} diff --git a/solr/core/src/java/org/apache/solr/update/processor/ParseNumericFieldUpdateProcessorFactory.java b/solr/core/src/java/org/apache/solr/update/processor/ParseNumericFieldUpdateProcessorFactory.java new file mode 100644 index 00000000000..20bc67e91ee --- /dev/null +++ b/solr/core/src/java/org/apache/solr/update/processor/ParseNumericFieldUpdateProcessorFactory.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.update.processor; + +import org.apache.commons.lang.LocaleUtils; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.core.SolrCore; +import org.apache.solr.schema.FieldType; +import org.apache.solr.schema.IndexSchema; + +import java.util.Locale; + +/** + * Abstract base class for numeric parsing update processor factories. + * Subclasses can optionally configure a locale. If no locale is configured, + * then {@link Locale#ROOT} will be used. E.g. to configure the French/France + * locale: + * + *
+ * <processor class="solr.Parse[Type]FieldUpdateProcessorFactory">
+ *   <str name="locale">fr_FR</str>
+ *   [...]
+ * </processor>
+ * + *

+ * See {@link Locale} for a description of acceptable language, country (optional) + * and variant (optional) values, joined with underscore(s). + *

+ */ +public abstract class ParseNumericFieldUpdateProcessorFactory extends FieldMutatingUpdateProcessorFactory { + + private static final String LOCALE_PARAM = "locale"; + + protected Locale locale = Locale.ROOT; + + @Override + public void init(NamedList args) { + String localeParam = (String)args.remove(LOCALE_PARAM); + if (null != localeParam) { + locale = LocaleUtils.toLocale(localeParam); + } + super.init(args); + } + + /** + * Returns true if the given FieldType is compatible with this parsing factory. + */ + protected abstract boolean isSchemaFieldTypeCompatible(FieldType type); + + /** + * Returns true if the field doesn't match any schema field or dynamic field, + * or if the matched field's type is compatible + * @param core Where to get the current schema from + */ + @Override + public FieldMutatingUpdateProcessor.FieldNameSelector + getDefaultSelector(final SolrCore core) { + + return new FieldMutatingUpdateProcessor.FieldNameSelector() { + @Override + public boolean shouldMutate(final String fieldName) { + final IndexSchema schema = core.getLatestSchema(); + FieldType type = schema.getFieldTypeNoEx(fieldName); + return (null == type) || isSchemaFieldTypeCompatible(type); + } + }; + } +} diff --git a/solr/core/src/test-files/solr/collection1/conf/solrconfig-parsing-update-processor-chains.xml b/solr/core/src/test-files/solr/collection1/conf/solrconfig-parsing-update-processor-chains.xml new file mode 100644 index 00000000000..3c41f507158 --- /dev/null +++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig-parsing-update-processor-chains.xml @@ -0,0 +1,230 @@ + + + + + + + ${tests.luceneMatchVersion:LUCENE_CURRENT} + + + + + + yyyy-MM-dd'T'HH:mm:ss.SSSZ + + + + + + + yyyy-MM-dd'T'HH:mm:ss.SSSZ + + + + + + false + yyyy-MM-dd'T'HH:mm:ss.SSSZ + + + + + + solr.DateField + solr.TrieDateField + yyyy-MM-dd'T'HH:mm:ss.SSSZ + + + + + + America/New_York + en_US + yyyy-MM-dd'T'HH:mm:ss.SSSZ + yyyy-MM-dd'T'HH:mm:ss.SSS + + + + + + + America/Los_Angeles + + MM/dd/yyyy + + + + + + + UTC + en_US + + yyyy-MM-dd'T'HH:mm:ss.SSSZ + yyyy-MM-dd'T'HH:mm:ss,SSSZ + yyyy-MM-dd'T'HH:mm:ss.SSS + yyyy-MM-dd'T'HH:mm:ss,SSS + yyyy-MM-dd'T'HH:mm:ssZ + yyyy-MM-dd'T'HH:mm:ss + yyyy-MM-dd'T'HH:mmZ + yyyy-MM-dd'T'HH:mm + yyyy-MM-dd HH:mm:ss.SSSZ + yyyy-MM-dd HH:mm:ss,SSSZ + yyyy-MM-dd HH:mm:ss.SSS + yyyy-MM-dd HH:mm:ss,SSS + yyyy-MM-dd HH:mm:ssZ + yyyy-MM-dd HH:mm:ss + yyyy-MM-dd HH:mmZ + yyyy-MM-dd HH:mm + yyyy-MM-dd hh:mm a + yyyy-MM-dd hh:mma + yyyy-MM-dd + EEE MMM dd HH:mm:ss Z yyyy + EEE MMM dd HH:mm:ss yyyy Z + EEE MMM dd HH:mm:ss yyyy + EEE, dd MMM yyyy HH:mm:ss Z + EEEE, dd-MMM-yy HH:mm:ss Z + EEEE, MMMM dd, yyyy + MMMM dd, yyyy + MMM. dd, yyyy + + + + + + + UTC + fr + 'le' EEEE dd MMMM yyyy + + + + + + + + + + + + + + + ru_RU + + + + + + + + + + + + + + + ru_RU + + + + + + + + + + + + + + + fr_FR + + + + + + + + + + + + + + + fr_FR + + + + + + + + + + + + + + + false + + true + YES + on + + + false + no + oFF + + + + + + + yup + nope + + + + + + + + + + + + + + + + yyyy-MM-dd + yyyy-MM-dd'T'HH:mm:ss.SSSZ + yyyy-MM-dd'T'HH:mm + + + + diff --git a/solr/core/src/test/org/apache/solr/update/processor/ParsingFieldUpdateProcessorsTest.java b/solr/core/src/test/org/apache/solr/update/processor/ParsingFieldUpdateProcessorsTest.java new file mode 100644 index 00000000000..4be6c21580d --- /dev/null +++ b/solr/core/src/test/org/apache/solr/update/processor/ParsingFieldUpdateProcessorsTest.java @@ -0,0 +1,910 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.update.processor; + +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.schema.IndexSchema; +import org.joda.time.DateTime; +import org.joda.time.DateTimeZone; +import org.joda.time.format.DateTimeFormat; +import org.joda.time.format.DateTimeFormatter; +import org.joda.time.format.ISODateTimeFormat; +import org.junit.BeforeClass; + +import java.util.Date; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Set; + +/** + * Tests for the field mutating update processors + * that parse Dates, Longs, Doubles, and Booleans. + */ +public class ParsingFieldUpdateProcessorsTest extends UpdateProcessorTestBase { + private static final double EPSILON = 1E-15; + + @BeforeClass + public static void beforeClass() throws Exception { + initCore("solrconfig-parsing-update-processor-chains.xml", "schema12.xml"); + } + + public void testParseDateRoundTrip() throws Exception { + IndexSchema schema = h.getCore().getLatestSchema(); + assertNotNull(schema.getFieldOrNull("date_dt")); // should match "*_dt" dynamic field + String dateString = "2010-11-12T13:14:15.168Z"; + SolrInputDocument d = processAdd("parse-date", doc(f("id", "9"), f("date_dt", dateString))); + assertNotNull(d); + DateTimeFormatter dateTimeFormatter = ISODateTimeFormat.dateTime(); + DateTime dateTime = dateTimeFormatter.parseDateTime(dateString); + assertTrue(d.getFieldValue("date_dt") instanceof Date); + assertEquals(dateTime.getMillis(), ((Date) d.getFieldValue("date_dt")).getTime()); + assertU(commit()); + assertQ(req("id:9"), "//date[@name='date_dt'][.='" + dateString + "']"); + } + + public void testParseTrieDateRoundTrip() throws Exception { + IndexSchema schema = h.getCore().getLatestSchema(); + assertNotNull(schema.getFieldOrNull("date_tdt")); // should match "*_tdt" dynamic field + String dateString = "2010-11-12T13:14:15.168Z"; + SolrInputDocument d = processAdd("parse-date", doc(f("id", "39"), f("date_tdt", dateString))); + assertNotNull(d); + DateTimeFormatter dateTimeFormatter = ISODateTimeFormat.dateTime(); + DateTime dateTime = dateTimeFormatter.parseDateTime(dateString); + assertTrue(d.getFieldValue("date_tdt") instanceof Date); + assertEquals(dateTime.getMillis(), ((Date) d.getFieldValue("date_tdt")).getTime()); + assertU(commit()); + assertQ(req("id:39"), "//date[@name='date_tdt'][.='" + dateString + "']"); + } + + + public void testParseDateFieldNotInSchema() throws Exception { + IndexSchema schema = h.getCore().getLatestSchema(); + assertNull(schema.getFieldOrNull("not_in_schema")); + String dateString = "2010-11-12T13:14:15.168Z"; + DateTimeFormatter dateTimeFormatter = ISODateTimeFormat.dateTime(); + DateTime dateTime = dateTimeFormatter.parseDateTime(dateString); + + SolrInputDocument d = processAdd("parse-date-no-run-processor", + doc(f("id", "18"), f("not_in_schema", dateString))); + assertNotNull(d); + assertTrue(d.getFieldValue("not_in_schema") instanceof Date); + assertEquals(dateTime.getMillis(), ((Date)d.getFieldValue("not_in_schema")).getTime()); + + d = processAdd("parse-date-no-run-processor", + doc(f("id", "36"), f("not_in_schema", "not a date", dateString))); + assertNotNull(d); + for (Object val : d.getFieldValues("not_in_schema")) { + // check that nothing was mutated, since not all field values are parseable as dates + assertTrue(val instanceof String); + } + + d = processAdd("parse-date-no-run-processor", + doc(f("id", "72"), f("not_in_schema", dateString, "not a date"))); + assertNotNull(d); + for (Object val : d.getFieldValues("not_in_schema")) { + // check again that nothing was mutated, but with a valid date first this time + assertTrue(val instanceof String); + } + } + + public void testParseDateNonUTCdefaultTimeZoneRoundTrip() throws Exception { + IndexSchema schema = h.getCore().getLatestSchema(); + assertNotNull(schema.getFieldOrNull("date_dt")); // should match "*_dt" dynamic field + String dateStringNoTimeZone = "2010-11-12T13:14:15.168"; + String dateStringUTC = dateStringNoTimeZone + "Z"; + + // dateStringNoTimeZone interpreted as being in timeZone America/New_York, then printed as UTC + String dateStringUSEasternTimeAsUTC = "2010-11-12T18:14:15.168Z"; + + SolrInputDocument d = processAdd + ("parse-date-non-UTC-defaultTimeZone", doc(f("id", "99"), f("dateUTC_dt", dateStringUTC), + f("dateNoTimeZone_dt", dateStringNoTimeZone))); + assertNotNull(d); + String pattern = "yyyy-MM-dd'T'HH:mm:ss.SSSZ"; + DateTimeFormatter dateTimeFormatterUTC = DateTimeFormat.forPattern(pattern); + DateTime dateTimeUTC = dateTimeFormatterUTC.parseDateTime(dateStringUTC); + assertTrue(d.getFieldValue("dateUTC_dt") instanceof Date); + assertTrue(d.getFieldValue("dateNoTimeZone_dt") instanceof Date); + assertEquals(dateTimeUTC.getMillis(), ((Date) d.getFieldValue("dateUTC_dt")).getTime()); + assertU(commit()); + assertQ(req("id:99") + ,"//date[@name='dateUTC_dt'][.='" + dateStringUTC + "']" + ,"//date[@name='dateNoTimeZone_dt'][.='" + dateStringUSEasternTimeAsUTC + "']"); + } + + public void testParseDateExplicitNotInSchemaSelector() throws Exception { + IndexSchema schema = h.getCore().getLatestSchema(); + assertNull(schema.getFieldOrNull("not_in_schema")); + String dateString = "2010-11-12T13:14:15.168Z"; + DateTimeFormatter dateTimeFormatter = ISODateTimeFormat.dateTime(); + DateTime dateTime = dateTimeFormatter.parseDateTime(dateString); + + SolrInputDocument d = processAdd("parse-date-explicit-not-in-schema-selector-no-run-processor", + doc(f("id", "88"), f("not_in_schema", dateString))); + assertNotNull(d); + assertTrue(d.getFieldValue("not_in_schema") instanceof Date); + assertEquals(dateTime.getMillis(), ((Date)d.getFieldValue("not_in_schema")).getTime()); + } + + public void testParseDateExplicitTypeClassSelector() throws Exception { + IndexSchema schema = h.getCore().getLatestSchema(); + assertNotNull(schema.getFieldOrNull("date_dt")); + String dateString = "2010-11-12T13:14:15.168Z"; + DateTimeFormatter dateTimeFormatter = ISODateTimeFormat.dateTime(); + DateTime dateTime = dateTimeFormatter.parseDateTime(dateString); + + SolrInputDocument d = processAdd("parse-date-explicit-typeclass-selector-no-run-processor", + doc(f("id", "77"), f("date_dt", dateString))); + assertNotNull(d); + assertTrue(d.getFieldValue("date_dt") instanceof Date); + assertEquals(dateTime.getMillis(), ((Date)d.getFieldValue("date_dt")).getTime()); + } + + public void testParseUSPacificDate() throws Exception { + IndexSchema schema = h.getCore().getLatestSchema(); + assertNull(schema.getFieldOrNull("not_in_schema")); + String dateString = "8/9/2010"; // Interpreted as 00:00 US Pacific Daylight Time = UTC+07:00 + String dateStringUTC = "2010-08-09T07:00:00.000Z"; + SolrInputDocument d = processAdd("US-Pacific-parse-date-no-run-processor", + doc(f("id", "288"), f("not_in_schema", dateString))); + assertNotNull(d); + assertTrue(d.getFieldValue("not_in_schema") instanceof Date); + assertEquals(dateStringUTC, + (new DateTime(((Date)d.getFieldValue("not_in_schema")).getTime(),DateTimeZone.UTC)).toString()); + } + + public void testParseDateFormats() throws Exception { + String[] formatExamples = { + "yyyy-MM-dd'T'HH:mm:ss.SSSZ", "2010-01-15T00:00:00.000Z", + "yyyy-MM-dd'T'HH:mm:ss,SSSZ", "2010-01-15T00:00:00,000Z", + "yyyy-MM-dd'T'HH:mm:ss.SSS", "2010-01-15T00:00:00.000", + "yyyy-MM-dd'T'HH:mm:ss,SSS", "2010-01-15T00:00:00,000", + "yyyy-MM-dd'T'HH:mm:ssZ", "2010-01-15T00:00:00Z", + "yyyy-MM-dd'T'HH:mm:ss", "2010-01-15T00:00:00", + "yyyy-MM-dd'T'HH:mmZ", "2010-01-15T00:00Z", + "yyyy-MM-dd'T'HH:mm", "2010-01-15T00:00", + "yyyy-MM-dd HH:mm:ss.SSSZ", "2010-01-15 00:00:00.000Z", + "yyyy-MM-dd HH:mm:ss,SSSZ", "2010-01-15 00:00:00,000Z", + "yyyy-MM-dd HH:mm:ss.SSS", "2010-01-15 00:00:00.000", + "yyyy-MM-dd HH:mm:ss,SSS", "2010-01-15 00:00:00,000", + "yyyy-MM-dd HH:mm:ssZ", "2010-01-15 00:00:00Z", + "yyyy-MM-dd HH:mm:ss", "2010-01-15 00:00:00", + "yyyy-MM-dd HH:mmZ", "2010-01-15 00:00Z", + "yyyy-MM-dd HH:mm", "2010-01-15 00:00", + "yyyy-MM-dd hh:mm a", "2010-01-15 12:00 AM", + "yyyy-MM-dd hh:mma", "2010-01-15 12:00AM", + "yyyy-MM-dd", "2010-01-15", + "EEE MMM dd HH:mm:ss Z yyyy", "Fri Jan 15 00:00:00 +0000 2010", + "EEE MMM dd HH:mm:ss yyyy Z", "Fri Jan 15 00:00:00 2010 +00:00", + "EEE MMM dd HH:mm:ss yyyy", "Fri Jan 15 00:00:00 2010", + "EEE, dd MMM yyyy HH:mm:ss Z", "Fri, 15 Jan 2010 00:00:00 +00:00", + "EEEE, dd-MMM-yy HH:mm:ss Z", "Friday, 15-Jan-10 00:00:00 +00:00", + "EEEE, MMMM dd, yyyy", "Friday, January 15, 2010", + "MMMM dd, yyyy", "January 15, 2010", + "MMM. dd, yyyy", "Jan. 15, 2010" + }; + + IndexSchema schema = h.getCore().getLatestSchema(); + assertNotNull(schema.getFieldOrNull("dateUTC_dt")); // should match "*_dt" dynamic field + + String dateTimePattern = "yyyy-MM-dd'T'HH:mm:ss.SSSZ"; + DateTimeFormatter dateTimeFormatterUTC = DateTimeFormat.forPattern(dateTimePattern); + DateTime dateTimeUTC = dateTimeFormatterUTC.parseDateTime(formatExamples[1]); + + for (int i = 0 ; i < formatExamples.length ; i += 2) { + String format = formatExamples[i]; + String dateString = formatExamples[i + 1]; + String id = "95" + i; + SolrInputDocument d = processAdd("parse-date-UTC-defaultTimeZone-no-run-processor", + doc(f("id", id), f("dateUTC_dt", dateString))); + assertNotNull(d); + assertTrue("date '" + dateString + "' with format '" + format + "' is not mutated to a Date", + d.getFieldValue("dateUTC_dt") instanceof Date); + assertEquals("date '" + dateString + "' with format '" + format + "' mismatched milliseconds", + dateTimeUTC.getMillis(), ((Date)d.getFieldValue("dateUTC_dt")).getTime()); + } + } + + public void testParseFrenchDate() throws Exception { + IndexSchema schema = h.getCore().getLatestSchema(); + assertNull(schema.getFieldOrNull("not_in_schema")); + String frenchDateString = "le vendredi 15 janvier 2010"; + String dateString = "2010-01-15T00:00:00.000Z"; + DateTimeFormatter dateTimeFormatter = ISODateTimeFormat.dateTime(); + DateTime dateTime = dateTimeFormatter.parseDateTime(dateString); + SolrInputDocument d = processAdd("parse-french-date-UTC-defaultTimeZone-no-run-processor", + doc(f("id", "88"), f("not_in_schema", frenchDateString))); + assertNotNull(d); + assertTrue(d.getFieldValue("not_in_schema") instanceof Date); + assertEquals(dateTime.getMillis(), ((Date)d.getFieldValue("not_in_schema")).getTime()); + } + + public void testFailedParseMixedDate() throws Exception { + IndexSchema schema = h.getCore().getLatestSchema(); + assertNull(schema.getFieldOrNull("not_in_schema")); + DateTimeFormatter dateTimeFormatter = ISODateTimeFormat.dateOptionalTimeParser().withZoneUTC(); + Map mixed = new HashMap(); + String[] dateStrings = { "2020-05-13T18:47", "1989-12-14", "1682-07-22T18:33:00.000Z" }; + for (String dateString : dateStrings) { + mixed.put(dateTimeFormatter.parseDateTime(dateString).toDate(), dateString); + } + Double extraDouble = 29.554d; + mixed.put(extraDouble, extraDouble); // Double-typed field value + SolrInputDocument d = processAdd("parse-date-no-run-processor", + doc(f("id", "7201"), f("not_in_schema", mixed.values()))); + assertNotNull(d); + boolean foundDouble = false; + for (Object o : d.getFieldValues("not_in_schema")) { + if (extraDouble == o) { + foundDouble = true; + } else { + assertTrue(o instanceof String); + } + mixed.values().remove(o); + } + assertTrue(foundDouble); + assertTrue(mixed.isEmpty()); + } + + public void testParseIntRoundTrip() throws Exception { + IndexSchema schema = h.getCore().getLatestSchema(); + assertNotNull(schema.getFieldOrNull("int1_i")); // should match dynamic field "*_i" + assertNotNull(schema.getFieldOrNull("int2_i")); // should match dynamic field "*_i" + int value = 1089883491; + String intString1 = "1089883491"; + String intString2 = "1,089,883,491"; + SolrInputDocument d = processAdd("parse-int", + doc(f("id", "113"), f("int1_i", intString1), f("int2_i", intString2))); + assertNotNull(d); + assertTrue(d.getFieldValue("int1_i") instanceof Integer); + assertEquals(value, ((Integer)d.getFieldValue("int1_i")).intValue()); + assertTrue(d.getFieldValue("int2_i") instanceof Integer); + assertEquals(value, ((Integer)d.getFieldValue("int2_i")).intValue()); + + assertU(commit()); + assertQ(req("id:113") + ,"//int[@name='int1_i'][.='" + value + "']" + ,"//int[@name='int2_i'][.='" + value + "']"); + } + + public void testParseIntNonRootLocale() throws Exception { + IndexSchema schema = h.getCore().getLatestSchema(); + assertNotNull(schema.getFieldOrNull("int_i")); // should match dynamic field "*_i" + assertNull(schema.getFieldOrNull("not_in_schema")); + int value = 1089883491; + String intString1 = "1089883491"; + String intString2 = "1 089 883 491"; // no-break space U+00A0 + SolrInputDocument d = processAdd("parse-int-russian-no-run-processor", + doc(f("id", "113"), f("int_i", intString1), f("not_in_schema", intString2))); + assertNotNull(d); + assertTrue(d.getFieldValue("int_i") instanceof Integer); + assertEquals(value, ((Integer)d.getFieldValue("int_i")).intValue()); + assertTrue(d.getFieldValue("not_in_schema") instanceof Integer); + assertEquals(value, ((Integer)d.getFieldValue("not_in_schema")).intValue()); + } + + public void testParseTrieIntRoundTrip() throws Exception { + IndexSchema schema = h.getCore().getLatestSchema(); + assertNotNull(schema.getFieldOrNull("int1_ti")); // should match dynamic field "*_ti" + assertNotNull(schema.getFieldOrNull("int2_ti")); // should match dynamic field "*_ti" + int value = 1089883491; + String intString1 = "1089883491"; + String intString2 = "1,089,883,491"; + SolrInputDocument d = processAdd("parse-int", + doc(f("id", "113"), f("int1_ti", intString1), f("int2_ti", intString2))); + assertNotNull(d); + assertTrue(d.getFieldValue("int1_ti") instanceof Integer); + assertEquals(value, ((Integer)d.getFieldValue("int1_ti")).intValue()); + assertTrue(d.getFieldValue("int2_ti") instanceof Integer); + assertEquals(value, ((Integer)d.getFieldValue("int2_ti")).intValue()); + + assertU(commit()); + assertQ(req("id:113") + ,"//int[@name='int1_ti'][.='" + value + "']" + ,"//int[@name='int2_ti'][.='" + value + "']"); + } + + public void testIntOverflow() throws Exception { + IndexSchema schema = h.getCore().getLatestSchema(); + assertNull(schema.getFieldOrNull("not_in_schema1")); + assertNull(schema.getFieldOrNull("not_in_schema2")); + long longValue1 = (long)Integer.MAX_VALUE + 100L; + long longValue2 = (long)Integer.MIN_VALUE - 100L; + String longString1 = Long.toString(longValue1); + String longString2 = Long.toString(longValue2); + SolrInputDocument d = processAdd("parse-int-no-run-processor", + doc(f("id", "282"), f("not_in_schema1", longString1), f("not_in_schema2", longString2))); + assertNotNull(d); + assertTrue(d.getFieldValue("not_in_schema1") instanceof String); + assertTrue(d.getFieldValue("not_in_schema2") instanceof String); + } + + public void testFailedParseMixedInt() throws Exception { + IndexSchema schema = h.getCore().getLatestSchema(); + assertNull(schema.getFieldOrNull("not_in_schema")); + Map mixed = new HashMap(); + Float floatVal = 294423.0f; + mixed.put(85, "85"); + mixed.put(floatVal, floatVal); // Float-typed field value + mixed.put(-2894518, "-2,894,518"); + mixed.put(1879472193, "1,879,472,193"); + SolrInputDocument d = processAdd("parse-int-no-run-processor", + doc(f("id", "7202"), f("not_in_schema", mixed.values()))); + assertNotNull(d); + boolean foundFloat = false; + for (Object o : d.getFieldValues("not_in_schema")) { + if (floatVal == o) { + foundFloat = true; + } else { + assertTrue(o instanceof String); + } + mixed.values().remove(o); + } + assertTrue(foundFloat); + assertTrue(mixed.isEmpty()); + } + + public void testParseLongRoundTrip() throws Exception { + IndexSchema schema = h.getCore().getLatestSchema(); + assertNotNull(schema.getFieldOrNull("long1_l")); // should match dynamic field "*_l" + assertNotNull(schema.getFieldOrNull("long2_l")); // should match dynamic field "*_l" + long value = 1089883491L; + String longString1 = "1089883491"; + String longString2 = "1,089,883,491"; + SolrInputDocument d = processAdd("parse-long", + doc(f("id", "113"), f("long1_l", longString1), f("long2_l", longString2))); + assertNotNull(d); + assertTrue(d.getFieldValue("long1_l") instanceof Long); + assertEquals(value, ((Long) d.getFieldValue("long1_l")).longValue()); + assertTrue(d.getFieldValue("long2_l") instanceof Long); + assertEquals(value, ((Long)d.getFieldValue("long2_l")).longValue()); + + assertU(commit()); + assertQ(req("id:113") + ,"//long[@name='long1_l'][.='" + value + "']" + ,"//long[@name='long2_l'][.='" + value + "']"); + } + + public void testParseLongNonRootLocale() throws Exception { + IndexSchema schema = h.getCore().getLatestSchema(); + assertNotNull(schema.getFieldOrNull("long_l")); // should match dynamic field "*_l" + assertNull(schema.getFieldOrNull("not_in_schema")); + long value = 1089883491L; + String longString1 = "1089883491"; + String longString2 = "1 089 883 491"; // no-break space U+00A0 + SolrInputDocument d = processAdd("parse-long-russian-no-run-processor", + doc(f("id", "113"), f("long_l", longString1), f("not_in_schema", longString2))); + assertNotNull(d); + assertTrue(d.getFieldValue("long_l") instanceof Long); + assertEquals(value, ((Long)d.getFieldValue("long_l")).longValue()); + assertTrue(d.getFieldValue("not_in_schema") instanceof Long); + assertEquals(value, ((Long)d.getFieldValue("not_in_schema")).longValue()); + } + + public void testParseTrieLongRoundTrip() throws Exception { + IndexSchema schema = h.getCore().getLatestSchema(); + assertNotNull(schema.getFieldOrNull("long1_tl")); // should match dynamic field "*_tl" + assertNotNull(schema.getFieldOrNull("long2_tl")); // should match dynamic field "*_tl" + long value = 1089883491L; + String longString1 = "1089883491"; + String longString2 = "1,089,883,491"; + SolrInputDocument d = processAdd("parse-long", + doc(f("id", "113"), f("long1_tl", longString1), f("long2_tl", longString2))); + assertNotNull(d); + assertTrue(d.getFieldValue("long1_tl") instanceof Long); + assertEquals(value, ((Long)d.getFieldValue("long1_tl")).longValue()); + assertTrue(d.getFieldValue("long2_tl") instanceof Long); + assertEquals(value, ((Long)d.getFieldValue("long2_tl")).longValue()); + + assertU(commit()); + assertQ(req("id:113") + ,"//long[@name='long1_tl'][.='" + value + "']" + ,"//long[@name='long2_tl'][.='" + value + "']"); + } + + public void testFailedParseMixedLong() throws Exception { + IndexSchema schema = h.getCore().getLatestSchema(); + assertNull(schema.getFieldOrNull("not_in_schema")); + Map mixed = new HashMap(); + Float floatVal = 294423.0f; + mixed.put(85L, "85"); + mixed.put(floatVal, floatVal); // Float-typed field value + mixed.put(-2894518L, "-2,894,518"); + mixed.put(1879472193L, "1,879,472,193"); + SolrInputDocument d = processAdd("parse-long-no-run-processor", + doc(f("id", "7204"), f("not_in_schema", mixed.values()))); + assertNotNull(d); + boolean foundFloat = false; + for (Object o : d.getFieldValues("not_in_schema")) { + if (floatVal == o) { + foundFloat = true; + } else { + assertTrue(o instanceof String); + } + mixed.values().remove(o); + } + assertTrue(foundFloat); + assertTrue(mixed.isEmpty()); + } + + public void testParseFloatRoundTrip() throws Exception { + IndexSchema schema = h.getCore().getLatestSchema(); + assertNotNull(schema.getFieldOrNull("float1_f")); // should match dynamic field "*_f" + assertNotNull(schema.getFieldOrNull("float2_f")); // should match dynamic field "*_f" + float value = 10898.83491f; + String floatString1 = "10898.83491"; + String floatString2 = "10,898.83491"; + SolrInputDocument d = processAdd("parse-float", + doc(f("id", "128"), f("float1_f", floatString1), f("float2_f", floatString2))); + assertNotNull(d); + assertTrue(d.getFieldValue("float1_f") instanceof Float); + assertEquals(value, (Float)d.getFieldValue("float1_f"), EPSILON); + assertTrue(d.getFieldValue("float2_f") instanceof Float); + assertEquals(value, (Float)d.getFieldValue("float2_f"), EPSILON); + + assertU(commit()); + assertQ(req("id:128") + ,"//float[@name='float1_f'][.='" + value + "']" + ,"//float[@name='float2_f'][.='" + value + "']"); + } + + public void testParseFloatNonRootLocale() throws Exception { + IndexSchema schema = h.getCore().getLatestSchema(); + assertNotNull(schema.getFieldOrNull("float_f")); // should match dynamic field "*_f" + assertNull(schema.getFieldOrNull("not_in_schema")); + float value = 10898.83491f; + String floatString1 = "10898,83491"; + String floatString2 = "10 898,83491"; // no-break space: U+00A0 + SolrInputDocument d = processAdd("parse-float-french-no-run-processor", + doc(f("id", "140"), f("float_f", floatString1), + f("not_in_schema", floatString2))); + assertNotNull(d); + assertTrue(d.getFieldValue("float_f") instanceof Float); + assertEquals(value, (Float)d.getFieldValue("float_f"), EPSILON); + assertTrue(d.getFieldValue("not_in_schema") instanceof Float); + assertEquals(value, (Float)d.getFieldValue("not_in_schema"), EPSILON); + } + + public void testParseTrieFloatRoundTrip() throws Exception { + IndexSchema schema = h.getCore().getLatestSchema(); + assertNotNull(schema.getFieldOrNull("float1_tf")); // should match dynamic field "*_tf" + assertNotNull(schema.getFieldOrNull("float2_tf")); // should match dynamic field "*_tf" + float value = 10898.83491f; + String floatString1 = "10898.83491"; + String floatString2 = "10,898.83491"; + SolrInputDocument d = processAdd("parse-float", + doc(f("id", "728"), f("float1_tf", floatString1), f("float2_tf", floatString2))); + assertNotNull(d); + assertTrue(d.getFieldValue("float1_tf") instanceof Float); + assertEquals(value, (Float)d.getFieldValue("float1_tf"), EPSILON); + assertTrue(d.getFieldValue("float2_tf") instanceof Float); + assertEquals(value, (Float)d.getFieldValue("float2_tf"), EPSILON); + + assertU(commit()); + assertQ(req("id:728") + ,"//float[@name='float1_tf'][.='" + value + "']" + ,"//float[@name='float2_tf'][.='" + value + "']"); + } + + public void testMixedFloats() throws Exception { + IndexSchema schema = h.getCore().getLatestSchema(); + assertNotNull(schema.getFieldOrNull("float_tf")); // should match dynamic field "*_tf" + Map mixedFloats = new HashMap(); + mixedFloats.put(85.0f, "85"); + mixedFloats.put(2894518.0f, "2,894,518"); + mixedFloats.put(2.94423E-9f, 2.94423E-9f); // Float-typed field value + mixedFloats.put(48794721.937f, "48,794,721.937"); + SolrInputDocument d = processAdd("parse-float-no-run-processor", + doc(f("id", "342"), f("float_tf", mixedFloats.values()))); + assertNotNull(d); + for (Object o : d.getFieldValues("float_tf")) { + assertTrue(o instanceof Float); + mixedFloats.remove(o); + } + assertTrue(mixedFloats.isEmpty()); + } + + public void testFailedParseMixedFloat() throws Exception { + IndexSchema schema = h.getCore().getLatestSchema(); + assertNull(schema.getFieldOrNull("not_in_schema")); + Map mixed = new HashMap(); + Long longVal = 294423L; + mixed.put(85L, "85"); + mixed.put(longVal, longVal); // Float-typed field value + mixed.put(-2894518L, "-2,894,518"); + mixed.put(1879472193L, "1,879,472,193"); + SolrInputDocument d = processAdd("parse-float-no-run-processor", + doc(f("id", "7205"), f("not_in_schema", mixed.values()))); + assertNotNull(d); + boolean foundLong = false; + for (Object o : d.getFieldValues("not_in_schema")) { + if (longVal == o) { + foundLong = true; + } else { + assertTrue(o instanceof String); + } + mixed.values().remove(o); + } + assertTrue(foundLong); + assertTrue(mixed.isEmpty()); + } + + public void testParseDoubleRoundTrip() throws Exception { + IndexSchema schema = h.getCore().getLatestSchema(); + assertNotNull(schema.getFieldOrNull("double1_d")); // should match dynamic field "*_d" + assertNotNull(schema.getFieldOrNull("double2_d")); // should match dynamic field "*_d" + double value = 10898.83491; + String doubleString1 = "10898.83491"; + String doubleString2 = "10,898.83491"; + SolrInputDocument d = processAdd("parse-double", + doc(f("id", "128"), f("double1_d", doubleString1), f("double2_d", doubleString2))); + assertNotNull(d); + assertTrue(d.getFieldValue("double1_d") instanceof Double); + assertEquals(value, (Double)d.getFieldValue("double1_d"), EPSILON); + assertTrue(d.getFieldValue("double2_d") instanceof Double); + assertEquals(value, (Double)d.getFieldValue("double2_d"), EPSILON); + + assertU(commit()); + assertQ(req("id:128") + ,"//double[@name='double1_d'][.='" + value + "']" + ,"//double[@name='double2_d'][.='" + value + "']"); + } + + public void testParseDoubleNonRootLocale() throws Exception { + IndexSchema schema = h.getCore().getLatestSchema(); + assertNotNull(schema.getFieldOrNull("double_d")); // should match dynamic field "*_d" + assertNull(schema.getFieldOrNull("not_in_schema")); + double value = 10898.83491; + String doubleString1 = "10898,83491"; + String doubleString2 = "10 898,83491"; // no-break space: U+00A0 + SolrInputDocument d = processAdd("parse-double-french-no-run-processor", + doc(f("id", "140"), f("double_d", doubleString1), + f("not_in_schema", doubleString2))); + assertNotNull(d); + assertTrue(d.getFieldValue("double_d") instanceof Double); + assertEquals(value, (Double)d.getFieldValue("double_d"), EPSILON); + assertTrue(d.getFieldValue("not_in_schema") instanceof Double); + assertEquals(value, (Double)d.getFieldValue("not_in_schema"), EPSILON); + } + + public void testParseTrieDoubleRoundTrip() throws Exception { + IndexSchema schema = h.getCore().getLatestSchema(); + assertNotNull(schema.getFieldOrNull("double1_td")); // should match dynamic field "*_td" + assertNotNull(schema.getFieldOrNull("double2_td")); // should match dynamic field "*_td" + double value = 10898.83491; + String doubleString1 = "10898.83491"; + String doubleString2 = "10,898.83491"; + SolrInputDocument d = processAdd("parse-double", + doc(f("id", "728"), f("double1_td", doubleString1), f("double2_td", doubleString2))); + assertNotNull(d); + assertTrue(d.getFieldValue("double1_td") instanceof Double); + assertEquals(value, (Double)d.getFieldValue("double1_td"), EPSILON); + assertTrue(d.getFieldValue("double2_td") instanceof Double); + assertEquals(value, (Double)d.getFieldValue("double2_td"), EPSILON); + + assertU(commit()); + assertQ(req("id:728") + ,"//double[@name='double1_td'][.='" + value + "']" + ,"//double[@name='double2_td'][.='" + value + "']"); + } + + public void testFailedParseMixedDouble() throws Exception { + IndexSchema schema = h.getCore().getLatestSchema(); + assertNull(schema.getFieldOrNull("not_in_schema")); + Map mixed = new HashMap(); + Long longVal = 294423L; + mixed.put(85, "85.0"); + mixed.put(longVal, longVal); // Float-typed field value + mixed.put(-2894.518, "-2,894.518"); + mixed.put(187947.2193, "187,947.2193"); + SolrInputDocument d = processAdd("parse-double-no-run-processor", + doc(f("id", "7206"), f("not_in_schema", mixed.values()))); + assertNotNull(d); + boolean foundLong = false; + for (Object o : d.getFieldValues("not_in_schema")) { + if (longVal == o) { + foundLong = true; + } else { + assertTrue(o instanceof String); + } + mixed.values().remove(o); + } + assertTrue(foundLong); + assertTrue(mixed.isEmpty()); + } + + public void testParseBooleanRoundTrip() throws Exception { + IndexSchema schema = h.getCore().getLatestSchema(); + assertNotNull(schema.getFieldOrNull("boolean1_b")); // should match dynamic field "*_b" + assertNotNull(schema.getFieldOrNull("boolean2_b")); // should match dynamic field "*_b" + boolean value1 = true; + boolean value2 = false; + SolrInputDocument d = processAdd("parse-boolean", + doc(f("id", "141"), f("boolean1_b", value1), f("boolean2_b", value2))); + assertNotNull(d); + assertTrue(d.getFieldValue("boolean1_b") instanceof Boolean); + assertEquals(value1, d.getFieldValue("boolean1_b")); + assertTrue(d.getFieldValue("boolean2_b") instanceof Boolean); + assertEquals(value2, d.getFieldValue("boolean2_b")); + + assertU(commit()); + assertQ(req("id:141") + ,"//bool[@name='boolean1_b'][.='" + value1 + "']" + ,"//bool[@name='boolean2_b'][.='" + value2 + "']"); + } + + public void testParseAlternateValueBooleans() throws Exception { + IndexSchema schema = h.getCore().getLatestSchema(); + assertNotNull(schema.getFieldOrNull("boolean1_b")); // should match dynamic field "*_b" + assertNotNull(schema.getFieldOrNull("boolean2_b")); // should match dynamic field "*_b" + assertNotNull(schema.getFieldOrNull("boolean3_b")); // should match dynamic field "*_b" + assertNotNull(schema.getFieldOrNull("boolean4_b")); // should match dynamic field "*_b" + assertNotNull(schema.getFieldOrNull("boolean5_b")); // should match dynamic field "*_b" + assertNull(schema.getFieldOrNull("not_in_schema")); + boolean[] values = { true, true, true, false, false, false }; + String[] stringValues = { "on", "yes", "True", "Off", "no", "FALSE" }; + String[] fieldNames = { "boolean1_b", "boolean2_b", "boolean3_b", "boolean4_b", "boolean5_b", "not_in_schema" }; + SolrInputDocument d = doc(f("id", "55")); + for (int i = 0 ; i < values.length ; ++i) { + d.addField(fieldNames[i], stringValues[i]); + } + d = processAdd("parse-boolean-alternate-values-no-run-processor", d); + assertNotNull(d); + + for (int i = 0 ; i < values.length ; ++i) { + assertTrue(d.getFieldValue(fieldNames[i]) instanceof Boolean); + assertEquals(values[i], d.getFieldValue(fieldNames[i])); + } + } + + public void testParseAlternateSingleValuesBooleans() throws Exception { + IndexSchema schema = h.getCore().getLatestSchema(); + assertNotNull(schema.getFieldOrNull("boolean1_b")); // should match dynamic field "*_b" + assertNotNull(schema.getFieldOrNull("boolean2_b")); // should match dynamic field "*_b" + boolean[] values = { true, false }; + String[] stringValues = { "yup", "nope" }; + String[] fieldNames = { "boolean1_b", "boolean2_b" }; + SolrInputDocument d = doc(f("id", "59")); + for (int i = 0 ; i < values.length ; ++i) { + d.addField(fieldNames[i], stringValues[i]); + } + d = processAdd("parse-boolean-alternate-single-values-no-run-processor", d); + assertNotNull(d); + + for (int i = 0 ; i < values.length ; ++i) { + assertTrue(d.getFieldValue(fieldNames[i]) instanceof Boolean); + assertEquals(values[i], d.getFieldValue(fieldNames[i])); + } + + // Standard boolean values should not be mutated, since they're not configured + stringValues = new String[] { "true", "false" }; + d = doc(f("id", "593")); + for (int i = 0 ; i < values.length ; ++i) { + d.addField(fieldNames[i], stringValues[i]); + } + d = processAdd("parse-boolean-alternate-single-values-no-run-processor", d); + assertNotNull(d); + + for (int i = 0 ; i < values.length ; ++i) { + assertTrue(d.getFieldValue(fieldNames[i]) instanceof String); + } + } + + public void testFailedParseMixedBoolean() throws Exception { + IndexSchema schema = h.getCore().getLatestSchema(); + assertNull(schema.getFieldOrNull("not_in_schema")); + Map mixed = new HashMap(); + Long longVal = 294423L; + mixed.put(true, "true"); + mixed.put(longVal, longVal); // Float-typed field value + mixed.put(false, "false"); + mixed.put(true, "true"); + SolrInputDocument d = processAdd("parse-boolean-no-run-processor", + doc(f("id", "7207"), f("not_in_schema", mixed.values()))); + assertNotNull(d); + boolean foundLong = false; + for (Object o : d.getFieldValues("not_in_schema")) { + if (longVal == o) { + foundLong = true; + } else { + assertTrue(o instanceof String); + } + mixed.values().remove(o); + } + assertTrue(foundLong); + assertTrue(mixed.isEmpty()); + } + + public void testCascadingParsers() throws Exception { + IndexSchema schema = h.getCore().getLatestSchema(); + final String fieldName = "not_in_schema"; + assertNull(schema.getFieldOrNull(fieldName)); + SolrInputDocument d = null; + String chain = "cascading-parsers-no-run-processor"; + + Map booleans = new HashMap(); + booleans.put(true, "truE"); + booleans.put(false, "False"); + d = processAdd(chain, doc(f("id", "341"), f(fieldName, booleans.values()))); + assertNotNull(d); + for (Object o : d.getFieldValues(fieldName)) { + assertTrue(o instanceof Boolean); + booleans.remove(o); + } + assertTrue(booleans.isEmpty()); + + Map ints = new HashMap(); + ints.put(2, "2"); + ints.put(50928, "50928"); + ints.put(86942008, "86,942,008"); + d = processAdd(chain, doc(f("id", "333"), f(fieldName, ints.values()))); + assertNotNull(d); + for (Object o : d.getFieldValues(fieldName)) { + assertTrue(o instanceof Integer); + ints.remove(o); + } + assertTrue(ints.isEmpty()); + + Map longs = new HashMap(); + longs.put(2L, "2"); + longs.put(50928L, "50928"); + longs.put(86942008987654L, "86,942,008,987,654"); + d = processAdd(chain, doc(f("id", "342"), f(fieldName, longs.values()))); + assertNotNull(d); + for (Object o : d.getFieldValues(fieldName)) { + assertTrue(o instanceof Long); + longs.remove(o); + } + assertTrue(longs.isEmpty()); + + /* + // Disabling this test because unlike Integer/Long, Float parsing can perform + // rounding to make values fit. See + Map floats = new HashMap(); + floats.put(2.0, "2."); + floats.put(509.28, "509.28"); + floats.put(86942.008, "86,942.008"); + d = processAdd(chain, doc(f("id", "342"), f(fieldName, floats.values()))); + assertNotNull(d); + for (Object o : d.getFieldValues(fieldName)) { + assertTrue(o instanceof float); + longs.remove(o); + } + */ + + Map doubles = new HashMap(); + doubles.put(2.0, "2."); + doubles.put(509.28, "509.28"); + doubles.put(86942.008, "86,942.008"); + d = processAdd(chain, doc(f("id", "342"), f(fieldName, doubles.values()))); + assertNotNull(d); + for (Object o : d.getFieldValues(fieldName)) { + assertTrue(o instanceof Double); + longs.remove(o); + } + + DateTimeFormatter dateTimeFormatter = ISODateTimeFormat.dateOptionalTimeParser().withZoneUTC(); + Map dates = new HashMap(); + String[] dateStrings = { "2020-05-13T18:47", "1989-12-14", "1682-07-22T18:33:00.000Z" }; + for (String dateString : dateStrings) { + dates.put(dateTimeFormatter.parseDateTime(dateString).toDate(), dateString); + } + d = processAdd(chain, doc(f("id", "343"), f(fieldName, dates.values()))); + assertNotNull(d); + for (Object o : d.getFieldValues(fieldName)) { + assertTrue(o instanceof Date); + dates.remove(o); + } + assertTrue(dates.isEmpty()); + + Map mixedLongsAndDoubles = new LinkedHashMap(); // preserve order + mixedLongsAndDoubles.put(85.0, "85"); + mixedLongsAndDoubles.put(2.94423E-9, "2.94423E-9"); + mixedLongsAndDoubles.put(2894518.0, "2,894,518"); + mixedLongsAndDoubles.put(48794721.937, "48,794,721.937"); + d = processAdd(chain, doc(f("id", "344"), f(fieldName, mixedLongsAndDoubles.values()))); + assertNotNull(d); + for (Object o : d.getFieldValues(fieldName)) { + assertTrue(o instanceof Double); + mixedLongsAndDoubles.remove(o); + } + assertTrue(mixedLongsAndDoubles.isEmpty()); + + Set mixed = new HashSet(); + mixed.add("true"); + mixed.add("1682-07-22T18:33:00.000Z"); + mixed.add("2,894,518"); + mixed.add("308,393,131,379,900"); + mixed.add("48,794,721.937"); + d = processAdd(chain, doc(f("id", "345"), f(fieldName, mixed))); + assertNotNull(d); + for (Object o : d.getFieldValues(fieldName)) { + assertTrue(o instanceof String); + } + + Map mixedDoubles = new LinkedHashMap(); // preserve order + mixedDoubles.put(85.0, "85"); + mixedDoubles.put(2.94423E-9, 2.94423E-9); // Double-typed field value + mixedDoubles.put(2894518.0, "2,894,518"); + mixedDoubles.put(48794721.937, "48,794,721.937"); + d = processAdd(chain, doc(f("id", "3391"), f(fieldName, mixedDoubles.values()))); + assertNotNull(d); + for (Object o : d.getFieldValues(fieldName)) { + assertTrue(o instanceof Double); + mixedDoubles.remove(o); + } + assertTrue(mixedDoubles.isEmpty()); + + Map mixedInts = new LinkedHashMap(); // preserve order + mixedInts.put(85, "85"); + mixedInts.put(294423, 294423); // Integer-typed field value + mixedInts.put(-2894518, "-2,894,518"); + mixedInts.put(1879472193, "1,879,472,193"); + d = processAdd(chain, doc(f("id", "3392"), f(fieldName, mixedInts.values()))); + assertNotNull(d); + for (Object o : d.getFieldValues(fieldName)) { + assertTrue(o instanceof Integer); + mixedInts.remove(o); + } + assertTrue(mixedInts.isEmpty()); + + Map mixedLongs = new LinkedHashMap(); // preserve order + mixedLongs.put(85L, "85"); + mixedLongs.put(42944233L, 42944233L); // Long-typed field value + mixedLongs.put(2894518L, "2,894,518"); + mixedLongs.put(48794721937L, "48,794,721,937"); + d = processAdd(chain, doc(f("id", "3393"), f(fieldName, mixedLongs.values()))); + assertNotNull(d); + for (Object o : d.getFieldValues(fieldName)) { + assertTrue(o instanceof Long); + mixedLongs.remove(o); + } + assertTrue(mixedLongs.isEmpty()); + + Map mixedBooleans = new LinkedHashMap(); // preserve order + mixedBooleans.put(true, "true"); + mixedBooleans.put(false, false); // Boolean-typed field value + mixedBooleans.put(false, "false"); + mixedBooleans.put(true, "true"); + d = processAdd(chain, doc(f("id", "3394"), f(fieldName, mixedBooleans.values()))); + assertNotNull(d); + for (Object o : d.getFieldValues(fieldName)) { + assertTrue(o instanceof Boolean); + mixedBooleans.remove(o); + } + assertTrue(mixedBooleans.isEmpty()); + + dateTimeFormatter = ISODateTimeFormat.dateOptionalTimeParser().withZoneUTC(); + Map mixedDates = new HashMap(); + dateStrings = new String[] { "2020-05-13T18:47", "1989-12-14", "1682-07-22T18:33:00.000Z" }; + for (String dateString : dateStrings) { + mixedDates.put(dateTimeFormatter.parseDateTime(dateString).toDate(), dateString); + } + Date extraDate = dateTimeFormatter.parseDateTime("2003-04-24").toDate(); + mixedDates.put(extraDate, extraDate); // Date-typed field value + d = processAdd(chain, doc(f("id", "3395"), f(fieldName, mixedDates.values()))); + assertNotNull(d); + for (Object o : d.getFieldValues(fieldName)) { + assertTrue(o instanceof Date); + mixedDates.remove(o); + } + assertTrue(mixedDates.isEmpty()); + } +} diff --git a/solr/licenses/joda-time-2.2.jar.sha1 b/solr/licenses/joda-time-2.2.jar.sha1 new file mode 100644 index 00000000000..5e68639267a --- /dev/null +++ b/solr/licenses/joda-time-2.2.jar.sha1 @@ -0,0 +1 @@ +a5f29a7acaddea3f4af307e8cf2d0cc82645fd7d diff --git a/solr/licenses/joda-time-LICENSE-ASL.txt b/solr/licenses/joda-time-LICENSE-ASL.txt new file mode 100644 index 00000000000..d6456956733 --- /dev/null +++ b/solr/licenses/joda-time-LICENSE-ASL.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/solr/licenses/joda-time-NOTICE.txt b/solr/licenses/joda-time-NOTICE.txt new file mode 100644 index 00000000000..dffbcf31cac --- /dev/null +++ b/solr/licenses/joda-time-NOTICE.txt @@ -0,0 +1,5 @@ +============================================================================= += NOTICE file corresponding to section 4d of the Apache License Version 2.0 = +============================================================================= +This product includes software developed by +Joda.org (http://www.joda.org/).