SOLR-6020: Auto-generate a unique key in schema-less example if data does not have an id field

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1614416 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Shalin Shekhar Mangar 2014-07-29 16:46:26 +00:00
parent 858e309c7b
commit 73503c4f66
6 changed files with 217 additions and 16 deletions

View File

@ -161,6 +161,11 @@ New Features
* SOLR-6267: Let user override Interval Faceting key with LocalParams (Tomas Fernandez_Lobbe
via Erick Erickson)
* SOLR-6020: Auto-generate a unique key in schema-less example if data does not have an id field.
The UUIDUpdateProcessor was improved to not require a field name in configuration and generate
a UUID into the unique Key field.
(Vitaliy Zhovtyuk, hossman, Steve Rowe, Erik Hatcher, shalin)
Bug Fixes
----------------------

View File

@ -72,7 +72,7 @@ public abstract class AbstractDefaultValueUpdateProcessorFactory
* to any document which does not already have a value in
* <code>fieldName</code>
*/
protected static abstract class DefaultValueUpdateProcessor
static abstract class DefaultValueUpdateProcessor
extends UpdateRequestProcessor {
final String fieldName;

View File

@ -17,29 +17,28 @@
package org.apache.solr.update.processor;
import java.io.IOException;
import java.util.UUID;
import java.util.Locale;
import org.apache.commons.lang.StringUtils;
import org.apache.solr.common.SolrException;
import static org.apache.solr.common.SolrException.ErrorCode.*;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.update.AddUpdateCommand;
import org.apache.solr.schema.SchemaField;
/**
* <p>
* An update processor that adds a newly generated <code>UUID</code> value
* to any document being added that does not already have a value in the
* An update processor that adds a newly generated <code>UUID</code> value
* to any document being added that does not already have a value in the
* specified field.
* </p>
*
* <p>
* In the example configuration below, if a document does not contain a value
* in the <code>id</code> field, a new <code>UUID</code> will be generated
* In the example configuration below, if a document does not contain a value
* in the <code>id</code> field, a new <code>UUID</code> will be generated
* and added as the value of that field.
* <p>
*
@ -48,19 +47,46 @@ import org.apache.solr.update.AddUpdateCommand;
* &lt;str name="fieldName"&gt;id&lt;/str&gt;
* &lt;/processor&gt;
* </pre>
*
*
* <p>
* If field name is omitted in processor configuration,
* then @{link org.apache.solr.schema.IndexSchema#getUniqueKeyField()}</code>
* is used as field and a new <code>UUID</code> will be generated
* and added as the value of that field. The field type of the uniqueKeyField
* must be anything which accepts a string or UUID value.
* <p>
* @see UUID
*/
public class UUIDUpdateProcessorFactory
extends AbstractDefaultValueUpdateProcessorFactory {
public class UUIDUpdateProcessorFactory extends UpdateRequestProcessorFactory {
@Override
public UpdateRequestProcessor getInstance(SolrQueryRequest req,
SolrQueryResponse rsp,
protected String fieldName = null;
@SuppressWarnings("unchecked")
public void init(NamedList args) {
Object obj = args.remove("fieldName");
if (null != obj) {
fieldName = obj.toString();
}
if (0 < args.size()) {
throw new SolrException(SERVER_ERROR,
"Unexpected init param(s): '" +
args.getName(0) + "'");
}
}
public UpdateRequestProcessor getInstance(SolrQueryRequest req,
SolrQueryResponse rsp,
UpdateRequestProcessor next ) {
return new DefaultValueUpdateProcessor(fieldName, next) {
if (StringUtils.isEmpty(fieldName)) {
SchemaField schemaField = req.getSchema().getUniqueKeyField();
fieldName = schemaField.getName();
}
return new AbstractDefaultValueUpdateProcessorFactory.DefaultValueUpdateProcessor(fieldName, next) {
@Override
public Object getDefaultValue() {
public Object getDefaultValue() {
return UUID.randomUUID().toString().toLowerCase(Locale.ROOT);
}
};

View File

@ -468,6 +468,18 @@
</processor>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="default-values-fallback-to-unique">
<processor class="solr.UUIDUpdateProcessorFactory">
<str name="fieldName">id</str>
</processor>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="default-values-fallback-to-unique-automatically">
<processor class="solr.UUIDUpdateProcessorFactory">
</processor>
</updateRequestProcessorChain>
<updateRequestProcessorChain name="uniq-values">
<processor class="solr.UniqFieldsUpdateProcessorFactory">
<str name="fieldRegex">uniq_.*</str>

View File

@ -0,0 +1,155 @@
package org.apache.solr.update.processor;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.core.SolrCore;
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrRequestInfo;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.update.AddUpdateCommand;
import org.junit.BeforeClass;
import java.io.IOException;
import java.util.Date;
import java.util.UUID;
public class UUIDUpdateProcessorFallbackTest extends SolrTestCaseJ4 {
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig-update-processor-chains.xml", "schema.xml");
}
public void testFallbackToUnique() throws Exception {
Date now = new Date();
// get all defaults
SolrInputDocument d = processAdd("default-values-fallback-to-unique",
doc(f("name", "Existing", "Values")));
assertNotNull(d);
assertNotNull(d.getFieldValue("id"));
assertNotNull(UUID.fromString(d.getFieldValue("id").toString()));
// get all defaults
d = processAdd("default-values-fallback-to-unique-automatically",
doc(f("name", "Existing", "Values")));
assertNotNull(d);
assertNotNull(d.getFieldValue("id"));
assertNotNull(UUID.fromString(d.getFieldValue("id").toString()));
// defaults already specified
d = processAdd("default-values-fallback-to-unique",
doc(f("timestamp", now),
f("id", "550e8400-e29b-41d4-a716-446655440000"),
f("processor_default_s", "I HAVE A VALUE"),
f("processor_default_i", 12345),
f("name", "Existing", "Values")));
assertNotNull(d);
assertEquals("550e8400-e29b-41d4-a716-446655440000",
d.getFieldValue("id"));
// defaults already specified
d = processAdd("default-values-fallback-to-unique-automatically",
doc(f("timestamp", now),
f("id", "550e8400-e29b-41d4-a716-446655440000"),
f("processor_default_s", "I HAVE A VALUE"),
f("processor_default_i", 121),
f("name", "Existing", "Values")));
assertNotNull(d);
assertEquals("550e8400-e29b-41d4-a716-446655440000",
d.getFieldValue("id"));
assertEquals(121, d.getFieldValue("processor_default_i"));
}
/**
* Convenience method for building up SolrInputDocuments
*/
SolrInputDocument doc(SolrInputField... fields) {
SolrInputDocument d = new SolrInputDocument();
for (SolrInputField f : fields) {
d.put(f.getName(), f);
}
return d;
}
/**
* Convenience method for building up SolrInputFields
*/
SolrInputField field(String name, float boost, Object... values) {
SolrInputField f = new SolrInputField(name);
for (Object v : values) {
f.addValue(v, 1.0F);
}
f.setBoost(boost);
return f;
}
/**
* Convenience method for building up SolrInputFields with default boost
*/
SolrInputField f(String name, Object... values) {
return field(name, 1.0F, values);
}
/**
* Runs a document through the specified chain, and returns the final
* document used when the chain is completed (NOTE: some chains may
* modify the document in place
*/
SolrInputDocument processAdd(final String chain,
final SolrInputDocument docIn)
throws IOException {
SolrCore core = h.getCore();
UpdateRequestProcessorChain pc = core.getUpdateProcessingChain(chain);
assertNotNull("No Chain named: " + chain, pc);
SolrQueryResponse rsp = new SolrQueryResponse();
SolrQueryRequest req = new LocalSolrQueryRequest
(core, new ModifiableSolrParams());
try {
SolrRequestInfo.setRequestInfo(new SolrRequestInfo(req,rsp));
AddUpdateCommand cmd = new AddUpdateCommand(req);
cmd.solrDoc = docIn;
UpdateRequestProcessor processor = pc.createProcessor(req, rsp);
processor.processAdd(cmd);
return cmd.solrDoc;
} finally {
SolrRequestInfo.clearRequestInfo();
req.close();
}
}
}

View File

@ -1549,6 +1549,9 @@
See http://wiki.apache.org/solr/GuessingFieldTypes
-->
<updateRequestProcessorChain name="add-unknown-fields-to-the-schema">
<!-- UUIDUpdateProcessorFactory will generate an id if none is present in the incoming document -->
<processor class="solr.UUIDUpdateProcessorFactory" />
<processor class="solr.LogUpdateProcessorFactory"/>
<processor class="solr.DistributedUpdateProcessorFactory"/>
<processor class="solr.RemoveBlankFieldUpdateProcessorFactory"/>