From be76c6845c2e6e9df8bd1be1544b131fedd03647 Mon Sep 17 00:00:00 2001 From: "Chris M. Hostetter" Date: Tue, 3 Nov 2015 00:43:17 +0000 Subject: [PATCH] SOLR-8113: CloneFieldUpdateProcessorFactory now supports choosing a dest field name based on a regex pattern and replacement init options. git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1712195 13f79535-47bb-0310-9956-ffa450edef68 --- solr/CHANGES.txt | 2 + .../CloneFieldUpdateProcessorFactory.java | 387 ++++++++++++++---- .../solrconfig-update-processor-chains.xml | 96 ++++- .../CloneFieldUpdateProcessorFactoryTest.java | 377 +++++++++++++++++ .../FieldMutatingUpdateProcessorTest.java | 155 ------- 5 files changed, 783 insertions(+), 234 deletions(-) create mode 100644 solr/core/src/test/org/apache/solr/update/processor/CloneFieldUpdateProcessorFactoryTest.java diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 725b71a6ba7..6fa157a09ef 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -190,6 +190,8 @@ New Features * SOLR-8217: JSON Facet API: add "method" param to terms/field facets to give an execution hint for what method should be used to facet. (yonik) +* SOLR-8113: CloneFieldUpdateProcessorFactory now supports choosing a "dest" field name based on a regex + pattern and replacement init options. (Gus Heck, hossman) Bug Fixes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/update/processor/CloneFieldUpdateProcessorFactory.java b/solr/core/src/java/org/apache/solr/update/processor/CloneFieldUpdateProcessorFactory.java index a68305314b4..290cf9dee49 100644 --- a/solr/core/src/java/org/apache/solr/update/processor/CloneFieldUpdateProcessorFactory.java +++ b/solr/core/src/java/org/apache/solr/update/processor/CloneFieldUpdateProcessorFactory.java @@ -21,8 +21,14 @@ import static org.apache.solr.common.SolrException.ErrorCode.SERVER_ERROR; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrInputDocument; @@ -40,18 +46,25 @@ import org.slf4j.LoggerFactory; /** * Clones the values found in any matching source field into - * the configured dest field. + * a configured dest field. *

- * While the dest field must be a single <str>, - * the source fields can be configured as either: + * The source field(s) can be configured as either: *

* - *

- * If the dest field already exists in the document, then the + * + *

The dest field can be a single <str> + * containing the literal name of a destination field, or it may be a <lst> specifying a + * regex pattern and a replacement string. If the pattern + replacement option + * is used the pattern will be matched against all fields matched by the source selector, and the replacement + * string (including any capture groups specified from the pattern) will be evaluated a using + * {@link Matcher#replaceAll(String)} to generate the literal name of the destination field. + *

+ * + *

If the resolved dest field already exists in the document, then the * values from the source fields will be added to it. The * "boost" value associated with the dest will not be changed, * and any boost specified on the source fields will be ignored. @@ -59,14 +72,23 @@ import org.slf4j.LoggerFactory; * newly created dest field will have the default boost of 1.0) *

*

- * In the example below, the category field will be cloned - * into the category_s field, both the authors and - * editors fields will be cloned into the contributors - * field, and any field with a name ending in _price -- except for - * list_price -- will be cloned into the all_prices - * field. + * In the example below: *

- * + * + * + * *
  *   <updateRequestProcessorChain name="multiple-clones">
  *     <processor class="solr.CloneFieldUpdateProcessorFactory">
@@ -82,30 +104,87 @@ import org.slf4j.LoggerFactory;
  *     </processor>
  *     <processor class="solr.CloneFieldUpdateProcessorFactory">
  *       <lst name="source">
- *         <str name="fieldRegex">.*_price</str>
+ *         <str name="fieldRegex">.*_price$</str>
  *         <lst name="exclude">
  *           <str name="fieldName">list_price</str>
  *         </lst>
  *       </lst>
  *       <str name="dest">all_prices</str>
  *     </processor>
+ *     <processor class="solr.processor.CloneFieldUpdateProcessorFactory">
+ *       <lst name="source">
+ *         <str name="fieldRegex">^feat(.*)s$</str>
+ *       </lst>
+ *       <lst name="dest">
+ *         <str name="pattern">^feat(.*)s$</str>
+ *         <str name="replacement">key_feat$1</str>
+ *       </str>
+ *     </processor>
  *   </updateRequestProcessorChain>
  * 
+ * + *

+ * In common case situations where you wish to use a single regular expression as both a + * fieldRegex selector and a destination pattern, a "short hand" syntax + * is support for convinience: The pattern and replacement may be specified + * at the top level, omitting source and dest declarations completely, and + * the pattern will be used to construct an equivilent source selector internally. + *

+ *

+ * For example, both of the following configurations are equivilent: + *

+ *
+ * <!-- full syntax -->
+ * <processor class="solr.processor.CloneFieldUpdateProcessorFactory">
+ *   <lst name="source">
+ *     <str name="fieldRegex"^gt;$feat(.*)s$</str>
+ *   </lst>
+ *   <lst name="dest">
+ *     <str name="pattern">^feat(.*)s$</str>
+ *     <str name="replacement">key_feat$1</str>
+ *   </str>
+ * </processor>
+ * 
+ * <!-- syntactic sugar syntax -->
+ * <processor class="solr.processor.CloneFieldUpdateProcessorFactory">
+ *   <str name="pattern">^feat(.*)s$</str>
+ *   <str name="replacement">key_feat$1</str>
+ * </processor>
+ * 
+ * + *

+ * When cloning multiple fields (or a single multivalued field) into a single valued field, one of the + * {@link FieldValueSubsetUpdateProcessorFactory} implementations configured after the + * CloneFieldUpdateProcessorFactory can be useful to reduce the list of values down to a + * single value. + *

+ * + * @see FieldValueSubsetUpdateProcessorFactory */ public class CloneFieldUpdateProcessorFactory extends UpdateRequestProcessorFactory implements SolrCoreAware { - + private final static Logger log = LoggerFactory.getLogger(CloneFieldUpdateProcessorFactory.class); public static final String SOURCE_PARAM = "source"; public static final String DEST_PARAM = "dest"; - + public static final String PATTERN_PARAM = "pattern"; + public static final String REPLACEMENT_PARAM = "replacement"; + private SelectorParams srcInclusions = new SelectorParams(); private Collection srcExclusions = new ArrayList<>(); private FieldNameSelector srcSelector = null; + + /** + * If pattern is null, this this is a literal field name. If pattern is non-null then this + * is a replacement string that may contain meta-characters (ie: capture group identifiers) + * @see #pattern + */ private String dest = null; + /** @see #dest */ + private Pattern pattern = null; protected final FieldNameSelector getSourceSelector() { if (null != srcSelector) return srcSelector; @@ -117,72 +196,198 @@ public class CloneFieldUpdateProcessorFactory @SuppressWarnings("unchecked") @Override public void init(NamedList args) { - Object d = args.remove(DEST_PARAM); - if (null == d) { - throw new SolrException - (SERVER_ERROR, "Init param '" + DEST_PARAM + "' must be specified"); - } else if (! (d instanceof CharSequence) ) { - throw new SolrException - (SERVER_ERROR, "Init param '" + DEST_PARAM + "' must be a string (ie: 'str')"); - } - dest = d.toString(); - List sources = args.getAll(SOURCE_PARAM); - if (0 == sources.size()) { - throw new SolrException - (SERVER_ERROR, "Init param '" + SOURCE_PARAM + "' must be specified"); - } - if (1 == sources.size() && sources.get(0) instanceof NamedList) { - // nested set of selector options - NamedList selectorConfig = (NamedList) args.remove(SOURCE_PARAM); - - srcInclusions = parseSelectorParams(selectorConfig); - - List excList = selectorConfig.getAll("exclude"); - - for (Object excObj : excList) { - if (null == excObj) { - throw new SolrException - (SERVER_ERROR, "Init param '" + SOURCE_PARAM + - "' child 'exclude' can not be null"); - } - if (! (excObj instanceof NamedList) ) { - throw new SolrException - (SERVER_ERROR, "Init param '" + SOURCE_PARAM + - "' child 'exclude' must be "); - } - NamedList exc = (NamedList) excObj; - srcExclusions.add(parseSelectorParams(exc)); - if (0 < exc.size()) { - throw new SolrException(SERVER_ERROR, "Init param '" + SOURCE_PARAM + - "' has unexpected 'exclude' sub-param(s): '" - + selectorConfig.getName(0) + "'"); - } - // call once per instance - selectorConfig.remove("exclude"); - } - - if (0 < selectorConfig.size()) { - throw new SolrException(SERVER_ERROR, "Init param '" + SOURCE_PARAM + - "' contains unexpected child param(s): '" + - selectorConfig.getName(0) + "'"); - } + // high level (loose) check for which type of config we have. + // + // individual init methods do more strict syntax checking + if (0 <= args.indexOf(SOURCE_PARAM, 0) && 0 <= args.indexOf(DEST_PARAM, 0) ) { + initSourceSelectorSyntax(args); + } else if (0 <= args.indexOf(PATTERN_PARAM, 0) && 0 <= args.indexOf(REPLACEMENT_PARAM, 0)) { + initSimpleRegexReplacement(args); } else { - // source better be one or more strings - srcInclusions.fieldName = new HashSet<>(args.removeConfigArgs("source")); + throw new SolrException(SERVER_ERROR, "A combination of either '" + SOURCE_PARAM + "' + '"+ + DEST_PARAM + "', or '" + REPLACEMENT_PARAM + "' + '" + + PATTERN_PARAM + "' init params are mandatory"); } - - if (0 < args.size()) { - throw new SolrException(SERVER_ERROR, - "Unexpected init param(s): '" + - args.getName(0) + "'"); + throw new SolrException(SERVER_ERROR, + "Unexpected init param(s): '" + + args.getName(0) + "'"); } super.init(args); } + /** + * init helper method that should only be called when we know for certain that both the + * "source" and "dest" init params do not exist. + */ + @SuppressWarnings("unchecked") + private void initSimpleRegexReplacement(NamedList args) { + // The syntactic sugar for the case where there is only one regex pattern for source and the same pattern + // is used for the destination pattern... + // + // pattern != null && replacement != null + // + // ...as top level elements, with no other config options specified + + // if we got here we know we had pattern and replacement, now check for the other two so that we can give a better + // message than "unexpected" + if (0 <= args.indexOf(SOURCE_PARAM, 0) || 0 <= args.indexOf(DEST_PARAM, 0) ) { + throw new SolrException(SERVER_ERROR,"Short hand syntax must not be mixed with full syntax. Found " + + PATTERN_PARAM + " and " + REPLACEMENT_PARAM + " but also found " + SOURCE_PARAM + " or " + DEST_PARAM); + } + + assert args.indexOf(SOURCE_PARAM, 0) < 0; + + Object patt = args.remove(PATTERN_PARAM); + Object replacement = args.remove(REPLACEMENT_PARAM); + + if (null == patt || null == replacement) { + throw new SolrException(SERVER_ERROR, "Init params '" + PATTERN_PARAM + "' and '" + + REPLACEMENT_PARAM + "' are both mandatory if '" + SOURCE_PARAM + "' and '"+ + DEST_PARAM + "' are not both specified"); + } + + if (0 != args.size()) { + throw new SolrException(SERVER_ERROR, "Init params '" + REPLACEMENT_PARAM + "' and '" + + PATTERN_PARAM + "' must be children of '" + DEST_PARAM + + "' to be combined with other options."); + } + + if (!(replacement instanceof String)) { + throw new SolrException(SERVER_ERROR, "Init param '" + REPLACEMENT_PARAM + "' must be a string (i.e. )"); + } + if (!(patt instanceof String)) { + throw new SolrException(SERVER_ERROR, "Init param '" + PATTERN_PARAM + "' must be a string (i.e. )"); + } + + dest = replacement.toString(); + try { + this.pattern = Pattern.compile(patt.toString()); + } catch (PatternSyntaxException pe) { + throw new SolrException(SERVER_ERROR, "Init param " + PATTERN_PARAM + + " is not a valid regex pattern: " + patt, pe); + + } + srcInclusions = new SelectorParams(); + srcInclusions.fieldRegex = Collections.singletonList(this.pattern); + } + + /** + * init helper method that should only be called when we know for certain that both the + * "source" and "dest" init params do exist. + */ + @SuppressWarnings("unchecked") + private void initSourceSelectorSyntax(NamedList args) { + // Full and complete syntax where source and dest are mandatory. + // + // source may be a single string or a selector. + // dest may be a single string or list containing pattern and replacement + // + // source != null && dest != null + + // if we got here we know we had source and dest, now check for the other two so that we can give a better + // message than "unexpected" + if (0 <= args.indexOf(PATTERN_PARAM, 0) || 0 <= args.indexOf(REPLACEMENT_PARAM, 0) ) { + throw new SolrException(SERVER_ERROR,"Short hand syntax must not be mixed with full syntax. Found " + + SOURCE_PARAM + " and " + DEST_PARAM + " but also found " + PATTERN_PARAM + " or " + REPLACEMENT_PARAM); + } + + Object d = args.remove(DEST_PARAM); + assert null != d; + + List sources = args.getAll(SOURCE_PARAM); + assert null != sources; + + if (1 == sources.size()) { + if (sources.get(0) instanceof NamedList) { + // nested set of selector options + NamedList selectorConfig = (NamedList) args.remove(SOURCE_PARAM); + + srcInclusions = parseSelectorParams(selectorConfig); + + List excList = selectorConfig.getAll("exclude"); + + for (Object excObj : excList) { + if (null == excObj) { + throw new SolrException(SERVER_ERROR, "Init param '" + SOURCE_PARAM + + "' child 'exclude' can not be null"); + } + if (!(excObj instanceof NamedList)) { + throw new SolrException(SERVER_ERROR, "Init param '" + SOURCE_PARAM + + "' child 'exclude' must be "); + } + NamedList exc = (NamedList) excObj; + srcExclusions.add(parseSelectorParams(exc)); + if (0 < exc.size()) { + throw new SolrException(SERVER_ERROR, "Init param '" + SOURCE_PARAM + + "' has unexpected 'exclude' sub-param(s): '" + + selectorConfig.getName(0) + "'"); + } + // call once per instance + selectorConfig.remove("exclude"); + } + + if (0 < selectorConfig.size()) { + throw new SolrException(SERVER_ERROR, "Init param '" + SOURCE_PARAM + + "' contains unexpected child param(s): '" + + selectorConfig.getName(0) + "'"); + } + // consume from the named list so it doesn't interfere with subsequent processing + sources.remove(0); + } + } + if (1 <= sources.size()) { + // source better be one or more strings + srcInclusions.fieldName = new HashSet<>(args.removeConfigArgs("source")); + } + if (srcInclusions == null) { + throw new SolrException(SERVER_ERROR, "Init params do not specify anything to clone, please supply either " + + SOURCE_PARAM + " and " + DEST_PARAM + " or " + PATTERN_PARAM + " and " + REPLACEMENT_PARAM + ". See javadocs" + + "for CloneFieldUpdateProcessorFactory for further details."); + } + + if (d instanceof NamedList) { + NamedList destList = (NamedList) d; + + Object patt = destList.remove(PATTERN_PARAM); + Object replacement = destList.remove(REPLACEMENT_PARAM); + + if (null == patt || null == replacement) { + throw new SolrException(SERVER_ERROR, "Init param '" + DEST_PARAM + "' children '" + + PATTERN_PARAM + "' and '" + REPLACEMENT_PARAM + + "' are both mandatoryand can not be null"); + } + if (! (patt instanceof String && replacement instanceof String)) { + throw new SolrException(SERVER_ERROR, "Init param '" + DEST_PARAM + "' children '" + + PATTERN_PARAM + "' and '" + REPLACEMENT_PARAM + + "' must both be strings (i.e. )"); + } + if (0 != destList.size()) { + throw new SolrException(SERVER_ERROR, "Init param '" + DEST_PARAM + "' has unexpected children: '" + + destList.getName(0) + "'"); + } + + try { + this.pattern = Pattern.compile(patt.toString()); + } catch (PatternSyntaxException pe) { + throw new SolrException(SERVER_ERROR, "Init param '" + DEST_PARAM + "' child '" + PATTERN_PARAM + + " is not a valid regex pattern: " + patt, pe); + } + dest = replacement.toString(); + + } else if (d instanceof String) { + dest = d.toString(); + } else { + throw new SolrException(SERVER_ERROR, "Init param '" + DEST_PARAM + "' must either be a string " + + "(i.e. ) or a list (i.e. ) containing '" + + PATTERN_PARAM + "' and '" + REPLACEMENT_PARAM); + } + + } + @Override public void inform(final SolrCore core) { @@ -202,32 +407,58 @@ public class CloneFieldUpdateProcessorFactory public final UpdateRequestProcessor getInstance(SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next) { + final FieldNameSelector srcSelector = getSourceSelector(); return new UpdateRequestProcessor(next) { @Override public void processAdd(AddUpdateCommand cmd) throws IOException { final SolrInputDocument doc = cmd.getSolrInputDocument(); + // destination may be regex replace string, which can cause multiple output fields. + Map destMap = new HashMap<>(); + // preserve initial values and boost (if any) - SolrInputField destField = doc.containsKey(dest) ? - doc.getField(dest) : new SolrInputField(dest); - - boolean modified = false; for (final String fname : doc.getFieldNames()) { if (! srcSelector.shouldMutate(fname)) continue; Collection srcFieldValues = doc.getFieldValues(fname); if(srcFieldValues == null || srcFieldValues.isEmpty()) continue; + + String resolvedDest = dest; + + if (pattern != null) { + Matcher matcher = pattern.matcher(fname); + if (matcher.find()) { + resolvedDest = matcher.replaceAll(dest); + } else { + log.debug("CloneFieldUpdateProcessor.srcSelector.shouldMutate(\"{}\") returned true, " + + "but replacement pattern did not match, field skipped.", fname); + continue; + } + } + SolrInputField destField; + if (doc.containsKey(resolvedDest)) { + destField = doc.getField(resolvedDest); + } else { + SolrInputField targetField = destMap.get(resolvedDest); + if (targetField == null) { + destField = new SolrInputField(resolvedDest); + } else { + destField = targetField; + } + } for (Object val : srcFieldValues) { // preserve existing dest boost (multiplicitive), ignore src boost destField.addValue(val, 1.0f); } - modified=true; + // put it in map to avoid concurrent modification... + destMap.put(resolvedDest, destField); } - if (modified) doc.put(dest, destField); - + for (String dest : destMap.keySet()) { + doc.put(dest, destMap.get(dest)); + } super.processAdd(cmd); } }; diff --git a/solr/core/src/test-files/solr/collection1/conf/solrconfig-update-processor-chains.xml b/solr/core/src/test-files/solr/collection1/conf/solrconfig-update-processor-chains.xml index e4f6f8023fd..56402819a7f 100644 --- a/solr/core/src/test-files/solr/collection1/conf/solrconfig-update-processor-chains.xml +++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig-update-processor-chains.xml @@ -292,6 +292,17 @@ dest_s + + + + source1_s + + source\d(_s) + dest$1 + + + + source1_s @@ -300,6 +311,17 @@ + + + source1_s + source2_s + + source\d(_s) + dest$1 + + + + @@ -310,6 +332,19 @@ + + + + source1_s + source2_s + + + source\d(_s) + dest$1 + + + + @@ -321,6 +356,41 @@ dest_s + + + + + source\d_.* + + source0_.* + + + + source\d(_s) + dest$1 + + + + + + + source\d_.* + dest_s + + + + + + + foo.* + + + + x(\d) + y$1 + + + @@ -364,6 +434,16 @@ toField + + + + field(.*) + toField + + + toField + + @@ -380,13 +460,27 @@ - .*_price + .*_price$ list_price all_prices + + + ^feat(.*)s$ + + + ^feat(.*)s$ + key_feat$1 + + + + + ^feat(.*)s$ + best_feat$1 + diff --git a/solr/core/src/test/org/apache/solr/update/processor/CloneFieldUpdateProcessorFactoryTest.java b/solr/core/src/test/org/apache/solr/update/processor/CloneFieldUpdateProcessorFactoryTest.java new file mode 100644 index 00000000000..ac618416228 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/update/processor/CloneFieldUpdateProcessorFactoryTest.java @@ -0,0 +1,377 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.update.processor; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; + +import org.apache.solr.common.SolrInputDocument; +import org.junit.BeforeClass; +import org.junit.Test; + +public class CloneFieldUpdateProcessorFactoryTest extends UpdateProcessorTestBase { + + @BeforeClass + public static void beforeClass() throws Exception { + initCore("solrconfig-update-processor-chains.xml", "schema12.xml"); + } + + @Test + public void testSimpleClone() throws Exception { + SolrInputDocument doc = processAdd("clone-single", + doc(f("id", "1"), + f("source1_s", "foo") + )); + assertEquals("source1_s should have stringValue", "foo", doc.getFieldValue("source1_s")); + assertEquals("dest_s should have stringValue", "foo", doc.getFieldValue("dest_s")); + } + + @Test + public void testMultiClone() throws Exception { + SolrInputDocument doc = processAdd("clone-multi", + doc(f("id", "1"), + f("source1_s", "foo"), + f("source2_s", "bar"))); + + assertEquals("source1_s should have stringValue", "foo", doc.getFieldValue("source1_s")); + assertEquals("source2_s should have stringValue", "bar", doc.getFieldValue("source2_s")); + Collection dest_s = doc.getFieldValues("dest_s"); + assertTrue(dest_s.contains("foo")); + assertTrue(dest_s.contains("bar")); + } + + @Test + public void testArrayClone() throws Exception { + SolrInputDocument doc = processAdd("clone-array", + doc(f("id", "1"), + f("source1_s", "foo"), + f("source2_s", "bar"))); + + assertEquals("source1_s should have stringValue", "foo", doc.getFieldValue("source1_s")); + assertEquals("source2_s should have stringValue", "bar", doc.getFieldValue("source2_s")); + Collection dest_s = doc.getFieldValues("dest_s"); + assertTrue(dest_s.contains("foo")); + assertTrue(dest_s.contains("bar")); + } + + @Test + public void testSelectorClone() throws Exception { + SolrInputDocument doc = processAdd("clone-selector", + doc(f("id", "1"), + f("source0_s", "nope, not me"), + f("source1_s", "foo"), + f("source2_s", "bar"))); + + assertEquals("source0_s should have stringValue", "nope, not me", doc.getFieldValue("source0_s")); + assertEquals("source1_s should have stringValue", "foo", doc.getFieldValue("source1_s")); + assertEquals("source2_s should have stringValue", "bar", doc.getFieldValue("source2_s")); + Collection dest_s = doc.getFieldValues("dest_s"); + assertTrue(dest_s.contains("foo")); + assertTrue(dest_s.contains("bar")); + assertFalse(dest_s.contains("nope, not me")); + } + + public void testMultipleClones() throws Exception { + SolrInputDocument doc = processAdd("multiple-clones", + doc(f("id", "1"), + f("category", "test"), + f("authors", "author1", "author2"), + f("editors", "ed1", "ed2"), + f("bfriday_price", 4.00), + f("sale_price", 5.00), + f("list_price", 6.00), + f("features", "hill", "valley", "dune"))); + + // the original values should remain + assertEquals("category should have a value", "test", doc.getFieldValue("category")); + + Collection auths = doc.getFieldValues("authors"); + assertTrue(auths.size() == 2); + assertTrue(auths.contains("author1")); + assertTrue(auths.contains("author2")); + Collection eds = doc.getFieldValues("editors"); + assertTrue(eds.size() == 2); + assertTrue(eds.contains("ed1")); + assertTrue(eds.contains("ed2")); + + assertEquals("bfriday_price should have a value", 4.0, doc.getFieldValue("bfriday_price")); + assertEquals("sale_price should have a value", 5.0, doc.getFieldValue("sale_price")); + assertEquals("list_price should have a value", 6.0, doc.getFieldValue("list_price")); + + Collection features = doc.getFieldValues("features"); + assertTrue(features.size() == 3); + assertTrue(features.contains("hill")); + assertTrue(features.contains("valley")); + assertTrue(features.contains("dune")); + + // and the copied values shoul be added + assertEquals("category_s should have a value", "test", doc.getFieldValue("category_s")); + + Collection contribs = doc.getFieldValues("contributors"); + assertTrue(contribs.size() == 4); + assertTrue(contribs.contains("author1")); + assertTrue(contribs.contains("author2")); + assertTrue(contribs.contains("ed1")); + assertTrue(contribs.contains("ed2")); + + Collection prices = doc.getFieldValues("all_prices"); + assertTrue(prices.size() == 2); + assertTrue(prices.contains(5.0)); + assertTrue(prices.contains(4.0)); + assertFalse(prices.contains(6.0)); + + // n.b. the field names below imply singularity but that would be achieved with a subsequent + // FirstFieldValueUpdateProcessorFactory (or similar custom class), and not in clone field itself + + Collection keyf = doc.getFieldValues("key_feature"); + assertTrue(keyf.size() == 3); + assertTrue(keyf.contains("hill")); + assertTrue(keyf.contains("valley")); + assertTrue(keyf.contains("dune")); + + Collection bestf = doc.getFieldValues("best_feature"); + assertTrue(bestf.size() == 3); + assertTrue(bestf.contains("hill")); + assertTrue(bestf.contains("valley")); + assertTrue(bestf.contains("dune")); + } + + public void testCloneField() throws Exception { + + SolrInputDocument d; + + // regardless of chain, all of these checks should be equivilent + for (String chain : Arrays.asList("clone-single", "clone-single-regex", + "clone-multi", "clone-multi-regex", + "clone-array", "clone-array-regex", + "clone-selector", "clone-selector-regex")) { + + // simple clone + d = processAdd(chain, + doc(f("id", "1111"), + f("source0_s", "NOT COPIED"), + f("source1_s", "123456789", "", 42, "abcd"))); + assertNotNull(chain, d); + assertEquals(chain, + Arrays.asList("123456789", "", 42, "abcd"), + d.getFieldValues("source1_s")); + assertEquals(chain, + Arrays.asList("123456789", "", 42, "abcd"), + d.getFieldValues("dest_s")); + + // append to existing values, preserve boost + d = processAdd(chain, + doc(f("id", "1111"), + field("dest_s", 2.3f, "orig1", "orig2"), + f("source0_s", "NOT COPIED"), + f("source1_s", "123456789", "", 42, "abcd"))); + assertNotNull(chain, d); + assertEquals(chain, + Arrays.asList("123456789", "", 42, "abcd"), + d.getFieldValues("source1_s")); + assertEquals(chain, + Arrays.asList("orig1", "orig2", "123456789", "", 42, "abcd"), + d.getFieldValues("dest_s")); + assertEquals(chain + ": dest boost changed", + 2.3f, d.getField("dest_s").getBoost(), 0.0f); + } + + // should be equivilent for any chain matching source1_s and source2_s (but not source0_s) + for (String chain : Arrays.asList("clone-multi", "clone-multi-regex", + "clone-array", "clone-array-regex", + "clone-selector", "clone-selector-regex")) { + + // simple clone + d = processAdd(chain, + doc(f("id", "1111"), + f("source0_s", "NOT COPIED"), + f("source1_s", "123456789", "", 42, "abcd"), + f("source2_s", "xxx", 999))); + assertNotNull(chain, d); + assertEquals(chain, + Arrays.asList("123456789", "", 42, "abcd"), + d.getFieldValues("source1_s")); + assertEquals(chain, + Arrays.asList("xxx", 999), + d.getFieldValues("source2_s")); + assertEquals(chain, + Arrays.asList("123456789", "", 42, "abcd", "xxx", 999), + d.getFieldValues("dest_s")); + + // append to existing values, preserve boost + d = processAdd(chain, + doc(f("id", "1111"), + field("dest_s", 2.3f, "orig1", "orig2"), + f("source0_s", "NOT COPIED"), + f("source1_s", "123456789", "", 42, "abcd"), + f("source2_s", "xxx", 999))); + assertNotNull(chain, d); + assertEquals(chain, + Arrays.asList("123456789", "", 42, "abcd"), + d.getFieldValues("source1_s")); + assertEquals(chain, + Arrays.asList("xxx", 999), + d.getFieldValues("source2_s")); + assertEquals(chain, + Arrays.asList("orig1", "orig2", + "123456789", "", 42, "abcd", + "xxx", 999), + d.getFieldValues("dest_s")); + assertEquals(chain + ": dest boost changed", + 2.3f, d.getField("dest_s").getBoost(), 0.0f); + } + + // any chain that copies source1_s to dest_s should be equivilent for these assertions + for (String chain : Arrays.asList("clone-simple-regex-syntax", + "clone-single", "clone-single-regex", + "clone-multi", "clone-multi-regex", + "clone-array", "clone-array-regex", + "clone-selector", "clone-selector-regex")) { + + // simple clone + d = processAdd(chain, + doc(f("id", "1111"), + f("source1_s", "123456789", "", 42, "abcd"))); + assertNotNull(chain, d); + assertEquals(chain, + Arrays.asList("123456789", "", 42, "abcd"), + d.getFieldValues("source1_s")); + assertEquals(chain, + Arrays.asList("123456789", "", 42, "abcd"), + d.getFieldValues("dest_s")); + + // append to existing values, preserve boost + d = processAdd(chain, + doc(f("id", "1111"), + field("dest_s", 2.3f, "orig1", "orig2"), + f("source1_s", "123456789", "", 42, "abcd"))); + assertNotNull(chain, d); + assertEquals(chain, + Arrays.asList("123456789", "", 42, "abcd"), + d.getFieldValues("source1_s")); + assertEquals(chain, + Arrays.asList("orig1", "orig2", "123456789", "", 42, "abcd"), + d.getFieldValues("dest_s")); + assertEquals(chain + ": dest boost changed", + 2.3f, d.getField("dest_s").getBoost(), 0.0f); + } + } + + public void testCloneFieldRegexReplaceAll() throws Exception { + SolrInputDocument d = processAdd("clone-regex-replaceall", + doc(f("id", "1111"), + f("foo_x2_s", "123456789", "", 42, "abcd"), + f("foo_x3_x7_s", "xyz"))); + + assertNotNull(d); + assertEquals(Arrays.asList("123456789", "", 42, "abcd"), + d.getFieldValues("foo_y2_s")); + assertEquals("xyz", + d.getFieldValue("foo_y3_y7_s")); + } + + public void testCloneFieldExample() throws Exception { + + SolrInputDocument d; + + // test example from the javadocs + d = processAdd("multiple-clones", + doc(f("id", "1111"), + f("category", "misc"), + f("authors", "Isaac Asimov", "John Brunner"), + f("editors", "John W. Campbell"), + f("store1_price", 87), + f("store2_price", 78), + f("store3_price", (Object) null), + f("list_price", 1000), + f("features", "Pages!", "Binding!"), + f("feat_of_strengths", "Pullups"))); + + assertNotNull(d); + assertEquals("misc", d.getFieldValue("category")); + assertEquals("misc", d.getFieldValue("category_s")); + assertEquals(Arrays.asList("Isaac Asimov", "John Brunner"), + d.getFieldValues("authors")); + assertEquals(Collections.singletonList("John W. Campbell"), + d.getFieldValues("editors")); + assertEquals(Arrays.asList("Isaac Asimov", "John Brunner", + "John W. Campbell"), + d.getFieldValues("contributors")); + assertEquals(87, d.getFieldValue("store1_price")); + assertEquals(78, d.getFieldValue("store2_price")); + assertEquals(1000, d.getFieldValue("list_price")); + assertEquals(Arrays.asList(87, 78), + d.getFieldValues("all_prices")); + + assertEquals(Arrays.asList("Pages!", "Binding!"), + d.getFieldValues("key_feature")); + assertEquals("Pullups", d.getFieldValue("key_feat_of_strength")); + } + + public void testCloneCombinations() throws Exception { + + SolrInputDocument d; + + // maxChars + d = processAdd("clone-max-chars", + doc(f("id", "1111"), + f("field1", "text"))); + assertNotNull(d); + assertEquals("text", d.getFieldValue("field1")); + assertEquals("tex", d.getFieldValue("toField")); + + // move + d = processAdd("clone-move", + doc(f("id", "1111"), + f("field1", "text"))); + assertNotNull(d); + assertEquals("text", d.getFieldValue("toField")); + assertFalse(d.containsKey("field1")); + + // replace + d = processAdd("clone-replace", + doc(f("id", "1111"), + f("toField", "IGNORED"), + f("field1", "text"))); + assertNotNull(d); + assertEquals("text", d.getFieldValue("field1")); + assertEquals("text", d.getFieldValue("toField")); + + // append + d = processAdd("clone-append", + doc(f("id", "1111"), + f("toField", "aaa"), + f("field1", "bbb"), + f("field2", "ccc"))); + assertNotNull(d); + assertEquals("bbb", d.getFieldValue("field1")); + assertEquals("ccc", d.getFieldValue("field2")); + assertEquals("aaa; bbb; ccc", d.getFieldValue("toField")); + + // first value + d = processAdd("clone-first", + doc(f("id", "1111"), + f("field0", "aaa"), + f("field1", "bbb"), + f("field2", "ccc"))); + assertNotNull(d); + assertEquals("aaa", d.getFieldValue("toField")); + } + +} diff --git a/solr/core/src/test/org/apache/solr/update/processor/FieldMutatingUpdateProcessorTest.java b/solr/core/src/test/org/apache/solr/update/processor/FieldMutatingUpdateProcessorTest.java index 77d3c049a07..1966c698324 100644 --- a/solr/core/src/test/org/apache/solr/update/processor/FieldMutatingUpdateProcessorTest.java +++ b/solr/core/src/test/org/apache/solr/update/processor/FieldMutatingUpdateProcessorTest.java @@ -728,120 +728,6 @@ public class FieldMutatingUpdateProcessorTest extends UpdateProcessorTestBase { } - public void testCloneField() throws Exception { - - SolrInputDocument d = null; - - // regardless of chain, all of these should be equivilent - for (String chain : Arrays.asList("clone-single", "clone-multi", - "clone-array","clone-selector" )) { - - // simple clone - d = processAdd(chain, - doc(f("id", "1111"), - f("source0_s", "NOT COPIED"), - f("source1_s", "123456789", "", 42, "abcd"))); - assertNotNull(chain, d); - assertEquals(chain, - Arrays.asList("123456789", "", 42, "abcd"), - d.getFieldValues("source1_s")); - assertEquals(chain, - Arrays.asList("123456789", "", 42, "abcd"), - d.getFieldValues("dest_s")); - - // append to existing values, preserve boost - d = processAdd(chain, - doc(f("id", "1111"), - field("dest_s", 2.3f, "orig1", "orig2"), - f("source0_s", "NOT COPIED"), - f("source1_s", "123456789", "", 42, "abcd"))); - assertNotNull(chain, d); - assertEquals(chain, - Arrays.asList("123456789", "", 42, "abcd"), - d.getFieldValues("source1_s")); - assertEquals(chain, - Arrays.asList("orig1", "orig2", "123456789", "", 42, "abcd"), - d.getFieldValues("dest_s")); - assertEquals(chain + ": dest boost changed", - 2.3f, d.getField("dest_s").getBoost(), 0.0f); - } - - // should be equivilent for any chain matching source1_s and source2_s - for (String chain : Arrays.asList("clone-multi", - "clone-array","clone-selector" )) { - - // simple clone - d = processAdd(chain, - doc(f("id", "1111"), - f("source0_s", "NOT COPIED"), - f("source1_s", "123456789", "", 42, "abcd"), - f("source2_s", "xxx", 999))); - assertNotNull(chain, d); - assertEquals(chain, - Arrays.asList("123456789", "", 42, "abcd"), - d.getFieldValues("source1_s")); - assertEquals(chain, - Arrays.asList("xxx", 999), - d.getFieldValues("source2_s")); - assertEquals(chain, - Arrays.asList("123456789", "", 42, "abcd", "xxx", 999), - d.getFieldValues("dest_s")); - - // append to existing values, preserve boost - d = processAdd(chain, - doc(f("id", "1111"), - field("dest_s", 2.3f, "orig1", "orig2"), - f("source0_s", "NOT COPIED"), - f("source1_s", "123456789", "", 42, "abcd"), - f("source2_s", "xxx", 999))); - assertNotNull(chain, d); - assertEquals(chain, - Arrays.asList("123456789", "", 42, "abcd"), - d.getFieldValues("source1_s")); - assertEquals(chain, - Arrays.asList("xxx", 999), - d.getFieldValues("source2_s")); - assertEquals(chain, - Arrays.asList("orig1", "orig2", - "123456789", "", 42, "abcd", - "xxx", 999), - d.getFieldValues("dest_s")); - assertEquals(chain + ": dest boost changed", - 2.3f, d.getField("dest_s").getBoost(), 0.0f); - } - } - - public void testCloneFieldExample() throws Exception { - - SolrInputDocument d = null; - - // test example from the javadocs - d = processAdd("multiple-clones", - doc(f("id", "1111"), - f("category", "misc"), - f("authors", "Isaac Asimov", "John Brunner"), - f("editors", "John W. Campbell"), - f("store1_price", 87), - f("store2_price", 78), - f("store3_price", (Object) null), - f("list_price", 1000))); - assertNotNull(d); - assertEquals("misc",d.getFieldValue("category")); - assertEquals("misc",d.getFieldValue("category_s")); - assertEquals(Arrays.asList("Isaac Asimov", "John Brunner"), - d.getFieldValues("authors")); - assertEquals(Arrays.asList("John W. Campbell"), - d.getFieldValues("editors")); - assertEquals(Arrays.asList("Isaac Asimov", "John Brunner", - "John W. Campbell"), - d.getFieldValues("contributors")); - assertEquals(87,d.getFieldValue("store1_price")); - assertEquals(78,d.getFieldValue("store2_price")); - assertEquals(1000,d.getFieldValue("list_price")); - assertEquals(Arrays.asList(87, 78), - d.getFieldValues("all_prices")); - - } public void testCountValues() throws Exception { @@ -893,47 +779,6 @@ public class FieldMutatingUpdateProcessorTest extends UpdateProcessorTestBase { - } - - public void testCloneCombinations() throws Exception { - - SolrInputDocument d = null; - - // maxChars - d = processAdd("clone-max-chars", - doc(f("id", "1111"), - f("field1", "text"))); - assertNotNull(d); - assertEquals("text",d.getFieldValue("field1")); - assertEquals("tex",d.getFieldValue("toField")); - - // move - d = processAdd("clone-move", - doc(f("id", "1111"), - f("field1", "text"))); - assertNotNull(d); - assertEquals("text",d.getFieldValue("toField")); - assertFalse(d.containsKey("field1")); - - // replace - d = processAdd("clone-replace", - doc(f("id", "1111"), - f("toField", "IGNORED"), - f("field1", "text"))); - assertNotNull(d); - assertEquals("text", d.getFieldValue("field1")); - assertEquals("text", d.getFieldValue("toField")); - - // append - d = processAdd("clone-append", - doc(f("id", "1111"), - f("toField", "aaa"), - f("field1", "bbb"), - f("field2", "ccc"))); - assertNotNull(d); - assertEquals("bbb", d.getFieldValue("field1")); - assertEquals("ccc", d.getFieldValue("field2")); - assertEquals("aaa; bbb; ccc", d.getFieldValue("toField")); } public void testConcatDefaults() throws Exception {