From 2fc29c1cd5320322a434ca7f820b69d80536aa5a Mon Sep 17 00:00:00 2001 From: Noble Paul Date: Wed, 19 Oct 2016 11:04:10 +0530 Subject: [PATCH] SOLR-9657: New TemplateUpdateProcessorFactory added --- .../dataimport/TemplateTransformer.java | 5 +- .../handler/dataimport/VariableResolver.java | 66 +++-------- .../java/org/apache/solr/core/SolrCore.java | 2 +- .../SimpleUpdateProcessorFactory.java | 45 ++++++- .../TemplateUpdateProcessorFactory.java | 110 ++++++++++++++++++ .../UpdateRequestProcessorChain.java | 14 ++- .../TemplateUpdateProcessorTest.java | 48 ++++++++ .../UpdateRequestProcessorFactoryTest.java | 15 +++ 8 files changed, 244 insertions(+), 61 deletions(-) create mode 100644 solr/core/src/java/org/apache/solr/update/processor/TemplateUpdateProcessorFactory.java create mode 100644 solr/core/src/test/org/apache/solr/update/processor/TemplateUpdateProcessorTest.java diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/TemplateTransformer.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/TemplateTransformer.java index 6bd60500aee..a5faa7e97b4 100644 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/TemplateTransformer.java +++ b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/TemplateTransformer.java @@ -56,12 +56,11 @@ public class TemplateTransformer extends Transformer { public Object transformRow(Map row, Context context) { - VariableResolver resolver = (VariableResolver) context - .getVariableResolver(); + VariableResolver resolver = context.getVariableResolver(); // Add current row to the copy of resolver map -// for (Map.Entry entry : row.entrySet()) for (Map map : context.getAllEntityFields()) { + map.entrySet(); String expr = map.get(TEMPLATE); if (expr == null) continue; diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/VariableResolver.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/VariableResolver.java index 76930e2309d..f255657baa6 100644 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/VariableResolver.java +++ b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/VariableResolver.java @@ -16,16 +16,19 @@ */ package org.apache.solr.handler.dataimport; -import java.util.ArrayList; -import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Properties; import java.util.WeakHashMap; +import java.util.function.Function; import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.apache.solr.update.processor.TemplateUpdateProcessorFactory; + +import static org.apache.solr.update.processor.TemplateUpdateProcessorFactory.Resolved; + /** *

* A set of nested maps that can resolve variables by namespaces. Variables are @@ -48,20 +51,13 @@ import java.util.regex.Pattern; public class VariableResolver { private static final Pattern DOT_PATTERN = Pattern.compile("[.]"); - private static final Pattern PLACEHOLDER_PATTERN = Pattern - .compile("[$][{](.*?)[}]"); private static final Pattern EVALUATOR_FORMAT_PATTERN = Pattern .compile("^(\\w*?)\\((.*?)\\)$"); private Map rootNamespace; private Map evaluators; private Map cache = new WeakHashMap<>(); - - class Resolved { - List startIndexes = new ArrayList<>(2); - List endOffsets = new ArrayList<>(2); - List variables = new ArrayList<>(2); - } - + private Function fun = this::resolve; + public static final String FUNCTIONS_NAMESPACE = "dataimporter.functions."; public static final String FUNCTIONS_NAMESPACE_SHORT = "dih.functions."; @@ -145,48 +141,8 @@ public class VariableResolver { * @return the string with the placeholders replaced with their values */ public String replaceTokens(String template) { - if (template == null) { - return null; - } - Resolved r = getResolved(template); - if (r.startIndexes != null) { - StringBuilder sb = new StringBuilder(template); - for (int i = r.startIndexes.size() - 1; i >= 0; i--) { - String replacement = resolve(r.variables.get(i)).toString(); - sb.replace(r.startIndexes.get(i), r.endOffsets.get(i), replacement); - } - return sb.toString(); - } else { - return template; - } + return TemplateUpdateProcessorFactory.replaceTokens(template, cache, fun); } - - private Resolved getResolved(String template) { - Resolved r = cache.get(template); - if (r == null) { - r = new Resolved(); - Matcher m = PLACEHOLDER_PATTERN.matcher(template); - while (m.find()) { - String variable = m.group(1); - r.startIndexes.add(m.start(0)); - r.endOffsets.add(m.end(0)); - r.variables.add(variable); - } - cache.put(template, r); - } - return r; - } - /** - * Get a list of variables embedded in the template string. - */ - public List getVariables(String template) { - Resolved r = getResolved(template); - if (r == null) { - return Collections.emptyList(); - } - return new ArrayList<>(r.variables); - } - public void addNamespace(String name, Map newMap) { if (newMap != null) { if (name != null) { @@ -204,7 +160,11 @@ public class VariableResolver { } } } - + + public List getVariables(String expr) { + return TemplateUpdateProcessorFactory.getVariables(expr, cache); + } + class CurrentLevel { final Map map; final int level; diff --git a/solr/core/src/java/org/apache/solr/core/SolrCore.java b/solr/core/src/java/org/apache/solr/core/SolrCore.java index 8ec703fc717..b1021f7678e 100644 --- a/solr/core/src/java/org/apache/solr/core/SolrCore.java +++ b/solr/core/src/java/org/apache/solr/core/SolrCore.java @@ -184,7 +184,7 @@ public final class SolrCore implements SolrInfoMBean, Closeable { private final long startNanoTime = System.nanoTime(); private final RequestHandlers reqHandlers; private final PluginBag searchComponents = new PluginBag<>(SearchComponent.class, this); - private final PluginBag updateProcessors = new PluginBag<>(UpdateRequestProcessorFactory.class, this); + private final PluginBag updateProcessors = new PluginBag<>(UpdateRequestProcessorFactory.class, this, true); private final Map updateProcessorChains; private final Map infoRegistry; private final IndexDeletionPolicyWrapper solrDelPolicy; diff --git a/solr/core/src/java/org/apache/solr/update/processor/SimpleUpdateProcessorFactory.java b/solr/core/src/java/org/apache/solr/update/processor/SimpleUpdateProcessorFactory.java index e9c5b2d83c6..aec9d8756cc 100644 --- a/solr/core/src/java/org/apache/solr/update/processor/SimpleUpdateProcessorFactory.java +++ b/solr/core/src/java/org/apache/solr/update/processor/SimpleUpdateProcessorFactory.java @@ -18,6 +18,7 @@ package org.apache.solr.update.processor; import java.io.IOException; +import org.apache.solr.common.util.NamedList; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.update.AddUpdateCommand; @@ -28,18 +29,60 @@ import org.apache.solr.update.AddUpdateCommand; * This is deliberately made to support only the add operation */ public abstract class SimpleUpdateProcessorFactory extends UpdateRequestProcessorFactory { + protected final String myName; + protected NamedList initArgs = new NamedList(); + private static ThreadLocal REQ = new ThreadLocal<>(); + + protected SimpleUpdateProcessorFactory() { + String simpleName = this.getClass().getSimpleName(); + this.myName = simpleName.substring(0, simpleName.indexOf("UpdateProcessorFactory")); + } + + @Override + public void init(NamedList args) { + super.init(args); + this.initArgs = args; + + } @Override public UpdateRequestProcessor getInstance(SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next) { return new UpdateRequestProcessor(next) { @Override public void processAdd(AddUpdateCommand cmd) throws IOException { - process(cmd, req, rsp); + REQ.set(req); + try { + process(cmd, req, rsp); + } finally { + REQ.remove(); + } super.processAdd(cmd); } }; } + protected String getParam(String name) { + String[] v = getParams(name); + return v == null || v.length == 0 ? null : v[0]; + } + + /**returns value from init args or request parameter. the request parameter must have the + * URP shortname prefixed + */ + protected String[] getParams(String name) { + Object v = REQ.get().getParams().getParams(_param(name)); + if (v == null) v = initArgs.get(name); + if (v == null) return null; + if (v instanceof String[]) return (String[]) v; + return new String[]{v.toString()}; + + } + + private String _param(String name) { + return myName + "." + name; + } + + /** * This object is reused across requests. So,this method should not store anything in the instance variable. */ diff --git a/solr/core/src/java/org/apache/solr/update/processor/TemplateUpdateProcessorFactory.java b/solr/core/src/java/org/apache/solr/update/processor/TemplateUpdateProcessorFactory.java new file mode 100644 index 00000000000..41d109bb79c --- /dev/null +++ b/solr/core/src/java/org/apache/solr/update/processor/TemplateUpdateProcessorFactory.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.update.processor; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.function.Function; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.update.AddUpdateCommand; + +public class TemplateUpdateProcessorFactory extends SimpleUpdateProcessorFactory { + @Override + protected void process(AddUpdateCommand cmd, SolrQueryRequest req, SolrQueryResponse rsp) { + String[] vals = getParams("field"); + SolrInputDocument doc = cmd.getSolrInputDocument(); + if (vals != null && vals.length > 0) { + for (String val : vals) { + if (val == null || val.isEmpty()) continue; + int idx = val.indexOf(':'); + if (idx == -1) + throw new RuntimeException("'field' must be of the format :"); + + String fName = val.substring(0, idx); + String template = val.substring(idx + 1); + doc.addField(fName, replaceTokens(template, null, s -> { + Object v = doc.getFieldValue(s); + return v == null ? "" : v; + })); + } + } + + } + + + public static Resolved getResolved(String template, Map cache) { + Resolved r = cache == null ? null : cache.get(template); + if (r == null) { + r = new Resolved(); + Matcher m = PLACEHOLDER_PATTERN.matcher(template); + while (m.find()) { + String variable = m.group(1); + r.startIndexes.add(m.start(0)); + r.endOffsets.add(m.end(0)); + r.variables.add(variable); + } + if (cache != null) cache.put(template, r); + } + return r; + } + + /** + * Get a list of variables embedded in the template string. + */ + public static List getVariables(String template, Map cache) { + Resolved r = getResolved(template, cache); + if (r == null) { + return Collections.emptyList(); + } + return new ArrayList<>(r.variables); + } + + public static String replaceTokens(String template, Map cache, Function fun) { + if (template == null) { + return null; + } + Resolved r = getResolved(template, cache); + if (r.startIndexes != null) { + StringBuilder sb = new StringBuilder(template); + for (int i = r.startIndexes.size() - 1; i >= 0; i--) { + String replacement = fun.apply(r.variables.get(i)).toString(); + sb.replace(r.startIndexes.get(i), r.endOffsets.get(i), replacement); + } + return sb.toString(); + } else { + return template; + } + } + + + public static class Resolved { + public List startIndexes = new ArrayList<>(2); + public List endOffsets = new ArrayList<>(2); + public List variables = new ArrayList<>(2); + } + + public static final Pattern PLACEHOLDER_PATTERN = Pattern + .compile("[$][{](.*?)[}]"); +} diff --git a/solr/core/src/java/org/apache/solr/update/processor/UpdateRequestProcessorChain.java b/solr/core/src/java/org/apache/solr/update/processor/UpdateRequestProcessorChain.java index e77dd82fa59..0ed626cb0fc 100644 --- a/solr/core/src/java/org/apache/solr/update/processor/UpdateRequestProcessorChain.java +++ b/solr/core/src/java/org/apache/solr/update/processor/UpdateRequestProcessorChain.java @@ -101,7 +101,7 @@ public final class UpdateRequestProcessorChain implements PluginInfoInitialized /** * Initializes the chain using the factories specified by the PluginInfo. - * if the chain includes the RunUpdateProcessorFactory, but + * if the chain includes the RunUpdateProcessorFactory, but * does not include an implementation of the * DistributingUpdateProcessorFactory interface, then an * instance of DistributedUpdateProcessorFactory will be @@ -269,8 +269,16 @@ public final class UpdateRequestProcessorChain implements PluginInfoInitialized s = s.trim(); if (s.isEmpty()) continue; UpdateRequestProcessorFactory p = core.getUpdateProcessors().get(s); - if (p == null) - throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No such processor " + s); + if (p == null) { + try { + p = core.createInstance(s + "UpdateProcessorFactory", UpdateRequestProcessorFactory.class, + "updateProcessor", null, core.getMemClassLoader()); + core.getUpdateProcessors().put(s, p); + } catch (SolrException e) { + } + if (p == null) + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No such processor " + s); + } result.add(p); } return result; diff --git a/solr/core/src/test/org/apache/solr/update/processor/TemplateUpdateProcessorTest.java b/solr/core/src/test/org/apache/solr/update/processor/TemplateUpdateProcessorTest.java new file mode 100644 index 00000000000..7ee8a34938b --- /dev/null +++ b/solr/core/src/test/org/apache/solr/update/processor/TemplateUpdateProcessorTest.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.update.processor; + +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.request.LocalSolrQueryRequest; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.update.AddUpdateCommand; + +public class TemplateUpdateProcessorTest extends SolrTestCaseJ4 { + public void testSimple() throws Exception { + + AddUpdateCommand cmd = new AddUpdateCommand(new LocalSolrQueryRequest(null, + new ModifiableSolrParams() + .add("processor", "Template") + .add("Template.field", "id:${firstName}_${lastName}") + .add("Template.field", "another:${lastName}_${firstName}") + .add("Template.field", "missing:${lastName}_${unKnown}") + + )); + cmd.solrDoc = new SolrInputDocument(); + cmd.solrDoc.addField("firstName", "Tom"); + cmd.solrDoc.addField("lastName", "Cruise"); + + new TemplateUpdateProcessorFactory().getInstance(cmd.getReq(), new SolrQueryResponse(), null).processAdd(cmd); + assertEquals("Tom_Cruise", cmd.solrDoc.getFieldValue("id")); + assertEquals("Cruise_Tom", cmd.solrDoc.getFieldValue("another")); + assertEquals("Cruise_", cmd.solrDoc.getFieldValue("missing")); + + } +} diff --git a/solr/core/src/test/org/apache/solr/update/processor/UpdateRequestProcessorFactoryTest.java b/solr/core/src/test/org/apache/solr/update/processor/UpdateRequestProcessorFactoryTest.java index e9dc93f282e..7ebefeca41e 100644 --- a/solr/core/src/test/org/apache/solr/update/processor/UpdateRequestProcessorFactoryTest.java +++ b/solr/core/src/test/org/apache/solr/update/processor/UpdateRequestProcessorFactoryTest.java @@ -23,6 +23,7 @@ import java.util.Arrays; import java.util.ArrayList; import java.util.List; +import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.core.SolrCore; import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.util.AbstractSolrTestCase; @@ -41,6 +42,20 @@ public class UpdateRequestProcessorFactoryTest extends AbstractSolrTestCase { public static void beforeClass() throws Exception { initCore("solrconfig-transformers.xml", "schema.xml"); } + + public void testRequestTimeUrp(){ + SolrCore core = h.getCore(); + ModifiableSolrParams params = new ModifiableSolrParams() + .add("processor", "Template") + .add("Template.field", "id_t:${firstName}_${lastName}") + .add("Template.field", "another_t:${lastName}_${firstName}") + .add("Template.field", "missing_t:${lastName}_${unKnown}"); + UpdateRequestProcessorChain chain = core.getUpdateProcessorChain(params); + List l = chain.getProcessors(); + assertTrue(l.get(0) instanceof TemplateUpdateProcessorFactory); + + + } public void testConfiguration() throws Exception {