SOLR-9657: New TemplateUpdateProcessorFactory added

This commit is contained in:
Noble Paul 2016-10-19 11:04:10 +05:30
parent abbbdc866d
commit 2fc29c1cd5
8 changed files with 244 additions and 61 deletions

View File

@ -56,12 +56,11 @@ public class TemplateTransformer extends Transformer {
public Object transformRow(Map<String, Object> row, Context context) {
VariableResolver resolver = (VariableResolver) context
.getVariableResolver();
VariableResolver resolver = context.getVariableResolver();
// Add current row to the copy of resolver map
// for (Map.Entry<String, Object> entry : row.entrySet())
for (Map<String, String> map : context.getAllEntityFields()) {
map.entrySet();
String expr = map.get(TEMPLATE);
if (expr == null)
continue;

View File

@ -16,16 +16,19 @@
*/
package org.apache.solr.handler.dataimport;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.WeakHashMap;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.solr.update.processor.TemplateUpdateProcessorFactory;
import static org.apache.solr.update.processor.TemplateUpdateProcessorFactory.Resolved;
/**
* <p>
* A set of nested maps that can resolve variables by namespaces. Variables are
@ -48,20 +51,13 @@ import java.util.regex.Pattern;
public class VariableResolver {
private static final Pattern DOT_PATTERN = Pattern.compile("[.]");
private static final Pattern PLACEHOLDER_PATTERN = Pattern
.compile("[$][{](.*?)[}]");
private static final Pattern EVALUATOR_FORMAT_PATTERN = Pattern
.compile("^(\\w*?)\\((.*?)\\)$");
private Map<String,Object> rootNamespace;
private Map<String,Evaluator> evaluators;
private Map<String,Resolved> cache = new WeakHashMap<>();
class Resolved {
List<Integer> startIndexes = new ArrayList<>(2);
List<Integer> endOffsets = new ArrayList<>(2);
List<String> variables = new ArrayList<>(2);
}
private Function<String,Object> fun = this::resolve;
public static final String FUNCTIONS_NAMESPACE = "dataimporter.functions.";
public static final String FUNCTIONS_NAMESPACE_SHORT = "dih.functions.";
@ -145,48 +141,8 @@ public class VariableResolver {
* @return the string with the placeholders replaced with their values
*/
public String replaceTokens(String template) {
if (template == null) {
return null;
}
Resolved r = getResolved(template);
if (r.startIndexes != null) {
StringBuilder sb = new StringBuilder(template);
for (int i = r.startIndexes.size() - 1; i >= 0; i--) {
String replacement = resolve(r.variables.get(i)).toString();
sb.replace(r.startIndexes.get(i), r.endOffsets.get(i), replacement);
}
return sb.toString();
} else {
return template;
}
return TemplateUpdateProcessorFactory.replaceTokens(template, cache, fun);
}
private Resolved getResolved(String template) {
Resolved r = cache.get(template);
if (r == null) {
r = new Resolved();
Matcher m = PLACEHOLDER_PATTERN.matcher(template);
while (m.find()) {
String variable = m.group(1);
r.startIndexes.add(m.start(0));
r.endOffsets.add(m.end(0));
r.variables.add(variable);
}
cache.put(template, r);
}
return r;
}
/**
* Get a list of variables embedded in the template string.
*/
public List<String> getVariables(String template) {
Resolved r = getResolved(template);
if (r == null) {
return Collections.emptyList();
}
return new ArrayList<>(r.variables);
}
public void addNamespace(String name, Map<String,Object> newMap) {
if (newMap != null) {
if (name != null) {
@ -204,7 +160,11 @@ public class VariableResolver {
}
}
}
public List<String> getVariables(String expr) {
return TemplateUpdateProcessorFactory.getVariables(expr, cache);
}
class CurrentLevel {
final Map<String,Object> map;
final int level;

View File

@ -184,7 +184,7 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
private final long startNanoTime = System.nanoTime();
private final RequestHandlers reqHandlers;
private final PluginBag<SearchComponent> searchComponents = new PluginBag<>(SearchComponent.class, this);
private final PluginBag<UpdateRequestProcessorFactory> updateProcessors = new PluginBag<>(UpdateRequestProcessorFactory.class, this);
private final PluginBag<UpdateRequestProcessorFactory> updateProcessors = new PluginBag<>(UpdateRequestProcessorFactory.class, this, true);
private final Map<String,UpdateRequestProcessorChain> updateProcessorChains;
private final Map<String, SolrInfoMBean> infoRegistry;
private final IndexDeletionPolicyWrapper solrDelPolicy;

View File

@ -18,6 +18,7 @@ package org.apache.solr.update.processor;
import java.io.IOException;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.update.AddUpdateCommand;
@ -28,18 +29,60 @@ import org.apache.solr.update.AddUpdateCommand;
* This is deliberately made to support only the add operation
*/
public abstract class SimpleUpdateProcessorFactory extends UpdateRequestProcessorFactory {
protected final String myName;
protected NamedList initArgs = new NamedList();
private static ThreadLocal<SolrQueryRequest> REQ = new ThreadLocal<>();
protected SimpleUpdateProcessorFactory() {
String simpleName = this.getClass().getSimpleName();
this.myName = simpleName.substring(0, simpleName.indexOf("UpdateProcessorFactory"));
}
@Override
public void init(NamedList args) {
super.init(args);
this.initArgs = args;
}
@Override
public UpdateRequestProcessor getInstance(SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next) {
return new UpdateRequestProcessor(next) {
@Override
public void processAdd(AddUpdateCommand cmd) throws IOException {
process(cmd, req, rsp);
REQ.set(req);
try {
process(cmd, req, rsp);
} finally {
REQ.remove();
}
super.processAdd(cmd);
}
};
}
protected String getParam(String name) {
String[] v = getParams(name);
return v == null || v.length == 0 ? null : v[0];
}
/**returns value from init args or request parameter. the request parameter must have the
* URP shortname prefixed
*/
protected String[] getParams(String name) {
Object v = REQ.get().getParams().getParams(_param(name));
if (v == null) v = initArgs.get(name);
if (v == null) return null;
if (v instanceof String[]) return (String[]) v;
return new String[]{v.toString()};
}
private String _param(String name) {
return myName + "." + name;
}
/**
* This object is reused across requests. So,this method should not store anything in the instance variable.
*/

View File

@ -0,0 +1,110 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.update.processor;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.update.AddUpdateCommand;
public class TemplateUpdateProcessorFactory extends SimpleUpdateProcessorFactory {
@Override
protected void process(AddUpdateCommand cmd, SolrQueryRequest req, SolrQueryResponse rsp) {
String[] vals = getParams("field");
SolrInputDocument doc = cmd.getSolrInputDocument();
if (vals != null && vals.length > 0) {
for (String val : vals) {
if (val == null || val.isEmpty()) continue;
int idx = val.indexOf(':');
if (idx == -1)
throw new RuntimeException("'field' must be of the format <field-name>:<the-template-string>");
String fName = val.substring(0, idx);
String template = val.substring(idx + 1);
doc.addField(fName, replaceTokens(template, null, s -> {
Object v = doc.getFieldValue(s);
return v == null ? "" : v;
}));
}
}
}
public static Resolved getResolved(String template, Map<String, Resolved> cache) {
Resolved r = cache == null ? null : cache.get(template);
if (r == null) {
r = new Resolved();
Matcher m = PLACEHOLDER_PATTERN.matcher(template);
while (m.find()) {
String variable = m.group(1);
r.startIndexes.add(m.start(0));
r.endOffsets.add(m.end(0));
r.variables.add(variable);
}
if (cache != null) cache.put(template, r);
}
return r;
}
/**
* Get a list of variables embedded in the template string.
*/
public static List<String> getVariables(String template, Map<String, Resolved> cache) {
Resolved r = getResolved(template, cache);
if (r == null) {
return Collections.emptyList();
}
return new ArrayList<>(r.variables);
}
public static String replaceTokens(String template, Map<String, Resolved> cache, Function<String, Object> fun) {
if (template == null) {
return null;
}
Resolved r = getResolved(template, cache);
if (r.startIndexes != null) {
StringBuilder sb = new StringBuilder(template);
for (int i = r.startIndexes.size() - 1; i >= 0; i--) {
String replacement = fun.apply(r.variables.get(i)).toString();
sb.replace(r.startIndexes.get(i), r.endOffsets.get(i), replacement);
}
return sb.toString();
} else {
return template;
}
}
public static class Resolved {
public List<Integer> startIndexes = new ArrayList<>(2);
public List<Integer> endOffsets = new ArrayList<>(2);
public List<String> variables = new ArrayList<>(2);
}
public static final Pattern PLACEHOLDER_PATTERN = Pattern
.compile("[$][{](.*?)[}]");
}

View File

@ -101,7 +101,7 @@ public final class UpdateRequestProcessorChain implements PluginInfoInitialized
/**
* Initializes the chain using the factories specified by the <code>PluginInfo</code>.
* if the chain includes the <code>RunUpdateProcessorFactory</code>, but
* if the chain includes the <code>RunUpdateProcessorFactory</code>, but
* does not include an implementation of the
* <code>DistributingUpdateProcessorFactory</code> interface, then an
* instance of <code>DistributedUpdateProcessorFactory</code> will be
@ -269,8 +269,16 @@ public final class UpdateRequestProcessorChain implements PluginInfoInitialized
s = s.trim();
if (s.isEmpty()) continue;
UpdateRequestProcessorFactory p = core.getUpdateProcessors().get(s);
if (p == null)
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No such processor " + s);
if (p == null) {
try {
p = core.createInstance(s + "UpdateProcessorFactory", UpdateRequestProcessorFactory.class,
"updateProcessor", null, core.getMemClassLoader());
core.getUpdateProcessors().put(s, p);
} catch (SolrException e) {
}
if (p == null)
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No such processor " + s);
}
result.add(p);
}
return result;

View File

@ -0,0 +1,48 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.update.processor;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.update.AddUpdateCommand;
public class TemplateUpdateProcessorTest extends SolrTestCaseJ4 {
public void testSimple() throws Exception {
AddUpdateCommand cmd = new AddUpdateCommand(new LocalSolrQueryRequest(null,
new ModifiableSolrParams()
.add("processor", "Template")
.add("Template.field", "id:${firstName}_${lastName}")
.add("Template.field", "another:${lastName}_${firstName}")
.add("Template.field", "missing:${lastName}_${unKnown}")
));
cmd.solrDoc = new SolrInputDocument();
cmd.solrDoc.addField("firstName", "Tom");
cmd.solrDoc.addField("lastName", "Cruise");
new TemplateUpdateProcessorFactory().getInstance(cmd.getReq(), new SolrQueryResponse(), null).processAdd(cmd);
assertEquals("Tom_Cruise", cmd.solrDoc.getFieldValue("id"));
assertEquals("Cruise_Tom", cmd.solrDoc.getFieldValue("another"));
assertEquals("Cruise_", cmd.solrDoc.getFieldValue("missing"));
}
}

View File

@ -23,6 +23,7 @@ import java.util.Arrays;
import java.util.ArrayList;
import java.util.List;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.core.SolrCore;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.util.AbstractSolrTestCase;
@ -41,6 +42,20 @@ public class UpdateRequestProcessorFactoryTest extends AbstractSolrTestCase {
public static void beforeClass() throws Exception {
initCore("solrconfig-transformers.xml", "schema.xml");
}
public void testRequestTimeUrp(){
SolrCore core = h.getCore();
ModifiableSolrParams params = new ModifiableSolrParams()
.add("processor", "Template")
.add("Template.field", "id_t:${firstName}_${lastName}")
.add("Template.field", "another_t:${lastName}_${firstName}")
.add("Template.field", "missing_t:${lastName}_${unKnown}");
UpdateRequestProcessorChain chain = core.getUpdateProcessorChain(params);
List<UpdateRequestProcessorFactory> l = chain.getProcessors();
assertTrue(l.get(0) instanceof TemplateUpdateProcessorFactory);
}
public void testConfiguration() throws Exception
{