mirror of https://github.com/apache/lucene.git
SOLR-12441: New NestedUpdateProcessorFactory
This commit is contained in:
parent
14e132b29f
commit
fe180bbd59
|
@ -103,6 +103,9 @@ New Features
|
|||
|
||||
* SOLR-12495: An #EQUALS function for replica in autoscaling policy to equally distribute replicas (noble)
|
||||
|
||||
* SOLR-12441: New NestedUpdateProcessorFactory (URP) to populate special fields _NEST_PARENT_ and _NEST_PATH_ of nested
|
||||
(child) documents. It will generate a uniqueKey of nested docs if they were blank too. (Moshe Bla, David Smiley)
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -107,6 +107,8 @@ public class IndexSchema {
|
|||
public static final String LUCENE_MATCH_VERSION_PARAM = "luceneMatchVersion";
|
||||
public static final String MAX_CHARS = "maxChars";
|
||||
public static final String NAME = "name";
|
||||
public static final String NEST_PARENT_FIELD_NAME = "_NEST_PARENT_";
|
||||
public static final String NEST_PATH_FIELD_NAME = "_NEST_PATH_";
|
||||
public static final String REQUIRED = "required";
|
||||
public static final String SCHEMA = "schema";
|
||||
public static final String SIMILARITY = "similarity";
|
||||
|
|
|
@ -0,0 +1,137 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.update.processor;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.common.SolrInputField;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.update.AddUpdateCommand;
|
||||
|
||||
/**
|
||||
* Adds fields to nested documents to support some nested search requirements.
|
||||
* It can even generate uniqueKey fields for nested docs.
|
||||
*
|
||||
* @see IndexSchema#NEST_PARENT_FIELD_NAME
|
||||
* @see IndexSchema#NEST_PATH_FIELD_NAME
|
||||
*
|
||||
* @since 7.5.0
|
||||
*/
|
||||
public class NestedUpdateProcessorFactory extends UpdateRequestProcessorFactory {
|
||||
|
||||
public UpdateRequestProcessor getInstance(SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next ) {
|
||||
boolean storeParent = shouldStoreDocParent(req.getSchema());
|
||||
boolean storePath = shouldStoreDocPath(req.getSchema());
|
||||
if(!(storeParent || storePath)) {
|
||||
return next;
|
||||
}
|
||||
return new NestedUpdateProcessor(req, shouldStoreDocParent(req.getSchema()), shouldStoreDocPath(req.getSchema()), next);
|
||||
}
|
||||
|
||||
private static boolean shouldStoreDocParent(IndexSchema schema) {
|
||||
return schema.getFields().containsKey(IndexSchema.NEST_PARENT_FIELD_NAME);
|
||||
}
|
||||
|
||||
private static boolean shouldStoreDocPath(IndexSchema schema) {
|
||||
return schema.getFields().containsKey(IndexSchema.NEST_PATH_FIELD_NAME);
|
||||
}
|
||||
|
||||
private static class NestedUpdateProcessor extends UpdateRequestProcessor {
|
||||
private static final String PATH_SEP_CHAR = "/";
|
||||
private static final String NUM_SEP_CHAR = "#";
|
||||
private static final String SINGULAR_VALUE_CHAR = "";
|
||||
private boolean storePath;
|
||||
private boolean storeParent;
|
||||
private String uniqueKeyFieldName;
|
||||
|
||||
|
||||
NestedUpdateProcessor(SolrQueryRequest req, boolean storeParent, boolean storePath, UpdateRequestProcessor next) {
|
||||
super(next);
|
||||
this.storeParent = storeParent;
|
||||
this.storePath = storePath;
|
||||
this.uniqueKeyFieldName = req.getSchema().getUniqueKeyField().getName();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void processAdd(AddUpdateCommand cmd) throws IOException {
|
||||
SolrInputDocument doc = cmd.getSolrInputDocument();
|
||||
processDocChildren(doc, null);
|
||||
super.processAdd(cmd);
|
||||
}
|
||||
|
||||
private void processDocChildren(SolrInputDocument doc, String fullPath) {
|
||||
for(SolrInputField field: doc.values()) {
|
||||
int childNum = 0;
|
||||
boolean isSingleVal = !(field.getValue() instanceof Collection);
|
||||
for(Object val: field) {
|
||||
if(!(val instanceof SolrInputDocument)) {
|
||||
// either all collection items are child docs or none are.
|
||||
break;
|
||||
}
|
||||
final String fieldName = field.getName();
|
||||
|
||||
if(fieldName.contains(PATH_SEP_CHAR)) {
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Field name: '" + fieldName
|
||||
+ "' contains: '" + PATH_SEP_CHAR + "' , which is reserved for the nested URP");
|
||||
}
|
||||
final String sChildNum = isSingleVal ? SINGULAR_VALUE_CHAR : String.valueOf(childNum);
|
||||
SolrInputDocument cDoc = (SolrInputDocument) val;
|
||||
if(!cDoc.containsKey(uniqueKeyFieldName)) {
|
||||
String parentDocId = doc.getField(uniqueKeyFieldName).getFirstValue().toString();
|
||||
cDoc.setField(uniqueKeyFieldName, generateChildUniqueId(parentDocId, fieldName, sChildNum));
|
||||
}
|
||||
final String lastKeyPath = fieldName + NUM_SEP_CHAR + sChildNum;
|
||||
// concat of all paths children.grandChild => children#1/grandChild#
|
||||
final String childDocPath = fullPath == null ? lastKeyPath : fullPath + PATH_SEP_CHAR + lastKeyPath;
|
||||
processChildDoc((SolrInputDocument) val, doc, childDocPath);
|
||||
++childNum;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void processChildDoc(SolrInputDocument sdoc, SolrInputDocument parent, String fullPath) {
|
||||
if(storePath) {
|
||||
setPathField(sdoc, fullPath);
|
||||
}
|
||||
if (storeParent) {
|
||||
setParentKey(sdoc, parent);
|
||||
}
|
||||
processDocChildren(sdoc, fullPath);
|
||||
}
|
||||
|
||||
private String generateChildUniqueId(String parentId, String childKey, String childNum) {
|
||||
// combines parentId with the child's key and childNum. e.g. "10/footnote#1"
|
||||
return parentId + PATH_SEP_CHAR + childKey + NUM_SEP_CHAR + childNum;
|
||||
}
|
||||
|
||||
private void setParentKey(SolrInputDocument sdoc, SolrInputDocument parent) {
|
||||
sdoc.setField(IndexSchema.NEST_PARENT_FIELD_NAME, parent.getFieldValue(uniqueKeyFieldName));
|
||||
}
|
||||
|
||||
private void setPathField(SolrInputDocument sdoc, String fullPath) {
|
||||
sdoc.setField(IndexSchema.NEST_PATH_FIELD_NAME, fullPath);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -565,6 +565,9 @@
|
|||
<field name="_version_" type="long" indexed="false" stored="false" docValues="true"/>
|
||||
<!-- points to the root document of a block of nested documents -->
|
||||
<field name="_root_" type="string" indexed="true" stored="true"/>
|
||||
<!-- required for NestedUpdateProcessor -->
|
||||
<field name="_NEST_PARENT_" type="string" indexed="true" stored="true"/>
|
||||
<field name="_NEST_PATH_" type="string" indexed="true" stored="true"/>
|
||||
|
||||
<field name="multi_int_with_docvals" type="tint" multiValued="true" docValues="true" indexed="false"/>
|
||||
|
||||
|
|
|
@ -30,6 +30,11 @@
|
|||
<directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.RAMDirectoryFactory}"/>
|
||||
<schemaFactory class="ClassicIndexSchemaFactory"/>
|
||||
|
||||
<updateRequestProcessorChain name="nested">
|
||||
<processor class="solr.NestedUpdateProcessorFactory" />
|
||||
<processor class="solr.RunUpdateProcessorFactory" />
|
||||
</updateRequestProcessorChain>
|
||||
|
||||
<updateRequestProcessorChain name="comprehensive">
|
||||
<processor class="solr.FieldLengthUpdateProcessorFactory">
|
||||
<arr name="typeClass">
|
||||
|
|
|
@ -0,0 +1,195 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.update;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.update.processor.NestedUpdateProcessorFactory;
|
||||
import org.apache.solr.update.processor.UpdateRequestProcessor;
|
||||
import org.junit.Before;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Rule;
|
||||
import org.junit.Test;
|
||||
import org.junit.rules.ExpectedException;
|
||||
|
||||
public class TestNestedUpdateProcessor extends SolrTestCaseJ4 {
|
||||
|
||||
private static final char PATH_SEP_CHAR = '/';
|
||||
private static final char NUM_SEP_CHAR = '#';
|
||||
private static final String SINGLE_VAL_CHAR = "";
|
||||
private static final String grandChildId = "4";
|
||||
private static final String secondChildList = "anotherChildList";
|
||||
private static final String jDoc = "{\n" +
|
||||
" \"add\": {\n" +
|
||||
" \"doc\": {\n" +
|
||||
" \"id\": \"1\",\n" +
|
||||
" \"children\": [\n" +
|
||||
" {\n" +
|
||||
" \"id\": \"2\",\n" +
|
||||
" \"foo_s\": \"Yaz\"\n" +
|
||||
" \"grandChild\": \n" +
|
||||
" {\n" +
|
||||
" \"id\": \""+ grandChildId + "\",\n" +
|
||||
" \"foo_s\": \"Jazz\"\n" +
|
||||
" },\n" +
|
||||
" },\n" +
|
||||
" {\n" +
|
||||
" \"id\": \"3\",\n" +
|
||||
" \"foo_s\": \"Bar\"\n" +
|
||||
" }\n" +
|
||||
" ]\n" +
|
||||
secondChildList + ": [{\"id\": \"4\", \"last_s\": \"Smith\"}],\n" +
|
||||
" }\n" +
|
||||
" }\n" +
|
||||
"}";
|
||||
|
||||
private static final String errDoc = "{\n" +
|
||||
" \"add\": {\n" +
|
||||
" \"doc\": {\n" +
|
||||
" \"id\": \"1\",\n" +
|
||||
" \"children" + PATH_SEP_CHAR + "a\": [\n" +
|
||||
" {\n" +
|
||||
" \"id\": \"2\",\n" +
|
||||
" \"foo_s\": \"Yaz\"\n" +
|
||||
" \"grandChild\": \n" +
|
||||
" {\n" +
|
||||
" \"id\": \""+ grandChildId + "\",\n" +
|
||||
" \"foo_s\": \"Jazz\"\n" +
|
||||
" },\n" +
|
||||
" },\n" +
|
||||
" {\n" +
|
||||
" \"id\": \"3\",\n" +
|
||||
" \"foo_s\": \"Bar\"\n" +
|
||||
" }\n" +
|
||||
" ]\n" +
|
||||
" }\n" +
|
||||
" }\n" +
|
||||
"}";
|
||||
|
||||
@Rule
|
||||
public ExpectedException thrown = ExpectedException.none();
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
initCore("solrconfig-update-processor-chains.xml", "schema15.xml");
|
||||
}
|
||||
|
||||
@Before
|
||||
public void before() throws Exception {
|
||||
assertU(delQ("*:*"));
|
||||
assertU(commit());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDeeplyNestedURPGrandChild() throws Exception {
|
||||
final String[] tests = {
|
||||
"/response/docs/[0]/id=='4'",
|
||||
"/response/docs/[0]/" + IndexSchema.NEST_PATH_FIELD_NAME + "=='children#0/grandChild#'"
|
||||
};
|
||||
indexSampleData(jDoc);
|
||||
|
||||
assertJQ(req("q", IndexSchema.NEST_PATH_FIELD_NAME + ":*/grandChild#*",
|
||||
"fl","*",
|
||||
"sort","id desc",
|
||||
"wt","json"),
|
||||
tests);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDeeplyNestedURPChildren() throws Exception {
|
||||
final String[] childrenTests = {
|
||||
"/response/docs/[0]/id=='2'",
|
||||
"/response/docs/[1]/id=='3'",
|
||||
"/response/docs/[0]/" + IndexSchema.NEST_PATH_FIELD_NAME + "=='children#0'",
|
||||
"/response/docs/[1]/" + IndexSchema.NEST_PATH_FIELD_NAME + "=='children#1'"
|
||||
};
|
||||
indexSampleData(jDoc);
|
||||
|
||||
assertJQ(req("q", IndexSchema.NEST_PATH_FIELD_NAME + ":children#?",
|
||||
"fl","*",
|
||||
"sort","id asc",
|
||||
"wt","json"),
|
||||
childrenTests);
|
||||
|
||||
assertJQ(req("q", IndexSchema.NEST_PATH_FIELD_NAME + ":anotherChildList#?",
|
||||
"fl","*",
|
||||
"sort","id asc",
|
||||
"wt","json"),
|
||||
"/response/docs/[0]/id=='4'",
|
||||
"/response/docs/[0]/" + IndexSchema.NEST_PATH_FIELD_NAME + "=='anotherChildList#0'");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDeeplyNestedURPSanity() throws Exception {
|
||||
SolrInputDocument docHierarchy = sdoc("id", "1", "children", sdocs(sdoc("id", "2", "name_s", "Yaz"),
|
||||
sdoc("id", "3", "name_s", "Jazz", "grandChild", sdoc("id", "4", "name_s", "Gaz"))), "lonelyChild", sdoc("id", "5", "name_s", "Loner"));
|
||||
UpdateRequestProcessor nestedUpdate = new NestedUpdateProcessorFactory().getInstance(req(), null, null);
|
||||
AddUpdateCommand cmd = new AddUpdateCommand(req());
|
||||
cmd.solrDoc = docHierarchy;
|
||||
nestedUpdate.processAdd(cmd);
|
||||
cmd.clear();
|
||||
|
||||
List children = (List) docHierarchy.get("children").getValues();
|
||||
|
||||
SolrInputDocument firstChild = (SolrInputDocument) children.get(0);
|
||||
assertEquals("SolrInputDocument(fields: [id=2, name_s=Yaz, _NEST_PATH_=children#0, _NEST_PARENT_=1])", firstChild.toString());
|
||||
|
||||
SolrInputDocument secondChild = (SolrInputDocument) children.get(1);
|
||||
assertEquals("SolrInputDocument(fields: [id=3, name_s=Jazz, grandChild=SolrInputDocument(fields: [id=4, name_s=Gaz, _NEST_PATH_=children#1/grandChild#, _NEST_PARENT_=3]), _NEST_PATH_=children#1, _NEST_PARENT_=1])", secondChild.toString());
|
||||
|
||||
SolrInputDocument grandChild = (SolrInputDocument)((SolrInputDocument) children.get(1)).get("grandChild").getValue();
|
||||
assertEquals("SolrInputDocument(fields: [id=4, name_s=Gaz, _NEST_PATH_=children#1/grandChild#, _NEST_PARENT_=3])", grandChild.toString());
|
||||
|
||||
SolrInputDocument singularChild = (SolrInputDocument) docHierarchy.get("lonelyChild").getValue();
|
||||
assertEquals("SolrInputDocument(fields: [id=5, name_s=Loner, _NEST_PATH_=lonelyChild#, _NEST_PARENT_=1])", singularChild.toString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDeeplyNestedURPChildrenWoId() throws Exception {
|
||||
final String rootId = "1";
|
||||
final String childKey = "grandChild";
|
||||
final String expectedId = rootId + "/children#1/" + childKey + NUM_SEP_CHAR + SINGLE_VAL_CHAR;
|
||||
SolrInputDocument noIdChildren = sdoc("id", rootId, "children", sdocs(sdoc("name_s", "Yaz"), sdoc("name_s", "Jazz", childKey, sdoc("name_s", "Gaz"))));
|
||||
UpdateRequestProcessor nestedUpdate = new NestedUpdateProcessorFactory().getInstance(req(), null, null);
|
||||
AddUpdateCommand cmd = new AddUpdateCommand(req());
|
||||
cmd.solrDoc = noIdChildren;
|
||||
nestedUpdate.processAdd(cmd);
|
||||
cmd.clear();
|
||||
List children = (List) noIdChildren.get("children").getValues();
|
||||
SolrInputDocument idLessChild = (SolrInputDocument)((SolrInputDocument) children.get(1)).get(childKey).getValue();
|
||||
assertTrue("Id less child did not get an Id", idLessChild.containsKey("id"));
|
||||
assertEquals("Id less child was assigned an unexpected id", expectedId, idLessChild.getFieldValue("id").toString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDeeplyNestedURPFieldNameException() throws Exception {
|
||||
final String errMsg = "contains: '" + PATH_SEP_CHAR + "' , which is reserved for the nested URP";
|
||||
thrown.expect(SolrException.class);
|
||||
indexSampleData(errDoc);
|
||||
thrown.expectMessage(errMsg);
|
||||
}
|
||||
|
||||
private void indexSampleData(String cmd) throws Exception {
|
||||
updateJ(cmd, params("update.chain", "nested"));
|
||||
assertU(commit());
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue