SOLR-12441: New NestedUpdateProcessorFactory

This commit is contained in:
David Smiley 2018-07-10 22:59:41 -04:00
parent 14e132b29f
commit fe180bbd59
6 changed files with 345 additions and 0 deletions

View File

@ -103,6 +103,9 @@ New Features
* SOLR-12495: An #EQUALS function for replica in autoscaling policy to equally distribute replicas (noble)
* SOLR-12441: New NestedUpdateProcessorFactory (URP) to populate special fields _NEST_PARENT_ and _NEST_PATH_ of nested
(child) documents. It will generate a uniqueKey of nested docs if they were blank too. (Moshe Bla, David Smiley)
Bug Fixes
----------------------

View File

@ -107,6 +107,8 @@ public class IndexSchema {
public static final String LUCENE_MATCH_VERSION_PARAM = "luceneMatchVersion";
public static final String MAX_CHARS = "maxChars";
public static final String NAME = "name";
public static final String NEST_PARENT_FIELD_NAME = "_NEST_PARENT_";
public static final String NEST_PATH_FIELD_NAME = "_NEST_PATH_";
public static final String REQUIRED = "required";
public static final String SCHEMA = "schema";
public static final String SIMILARITY = "similarity";

View File

@ -0,0 +1,137 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.update.processor;
import java.io.IOException;
import java.util.Collection;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.update.AddUpdateCommand;
/**
* Adds fields to nested documents to support some nested search requirements.
* It can even generate uniqueKey fields for nested docs.
*
* @see IndexSchema#NEST_PARENT_FIELD_NAME
* @see IndexSchema#NEST_PATH_FIELD_NAME
*
* @since 7.5.0
*/
public class NestedUpdateProcessorFactory extends UpdateRequestProcessorFactory {
public UpdateRequestProcessor getInstance(SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next ) {
boolean storeParent = shouldStoreDocParent(req.getSchema());
boolean storePath = shouldStoreDocPath(req.getSchema());
if(!(storeParent || storePath)) {
return next;
}
return new NestedUpdateProcessor(req, shouldStoreDocParent(req.getSchema()), shouldStoreDocPath(req.getSchema()), next);
}
private static boolean shouldStoreDocParent(IndexSchema schema) {
return schema.getFields().containsKey(IndexSchema.NEST_PARENT_FIELD_NAME);
}
private static boolean shouldStoreDocPath(IndexSchema schema) {
return schema.getFields().containsKey(IndexSchema.NEST_PATH_FIELD_NAME);
}
private static class NestedUpdateProcessor extends UpdateRequestProcessor {
private static final String PATH_SEP_CHAR = "/";
private static final String NUM_SEP_CHAR = "#";
private static final String SINGULAR_VALUE_CHAR = "";
private boolean storePath;
private boolean storeParent;
private String uniqueKeyFieldName;
NestedUpdateProcessor(SolrQueryRequest req, boolean storeParent, boolean storePath, UpdateRequestProcessor next) {
super(next);
this.storeParent = storeParent;
this.storePath = storePath;
this.uniqueKeyFieldName = req.getSchema().getUniqueKeyField().getName();
}
@Override
public void processAdd(AddUpdateCommand cmd) throws IOException {
SolrInputDocument doc = cmd.getSolrInputDocument();
processDocChildren(doc, null);
super.processAdd(cmd);
}
private void processDocChildren(SolrInputDocument doc, String fullPath) {
for(SolrInputField field: doc.values()) {
int childNum = 0;
boolean isSingleVal = !(field.getValue() instanceof Collection);
for(Object val: field) {
if(!(val instanceof SolrInputDocument)) {
// either all collection items are child docs or none are.
break;
}
final String fieldName = field.getName();
if(fieldName.contains(PATH_SEP_CHAR)) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Field name: '" + fieldName
+ "' contains: '" + PATH_SEP_CHAR + "' , which is reserved for the nested URP");
}
final String sChildNum = isSingleVal ? SINGULAR_VALUE_CHAR : String.valueOf(childNum);
SolrInputDocument cDoc = (SolrInputDocument) val;
if(!cDoc.containsKey(uniqueKeyFieldName)) {
String parentDocId = doc.getField(uniqueKeyFieldName).getFirstValue().toString();
cDoc.setField(uniqueKeyFieldName, generateChildUniqueId(parentDocId, fieldName, sChildNum));
}
final String lastKeyPath = fieldName + NUM_SEP_CHAR + sChildNum;
// concat of all paths children.grandChild => children#1/grandChild#
final String childDocPath = fullPath == null ? lastKeyPath : fullPath + PATH_SEP_CHAR + lastKeyPath;
processChildDoc((SolrInputDocument) val, doc, childDocPath);
++childNum;
}
}
}
private void processChildDoc(SolrInputDocument sdoc, SolrInputDocument parent, String fullPath) {
if(storePath) {
setPathField(sdoc, fullPath);
}
if (storeParent) {
setParentKey(sdoc, parent);
}
processDocChildren(sdoc, fullPath);
}
private String generateChildUniqueId(String parentId, String childKey, String childNum) {
// combines parentId with the child's key and childNum. e.g. "10/footnote#1"
return parentId + PATH_SEP_CHAR + childKey + NUM_SEP_CHAR + childNum;
}
private void setParentKey(SolrInputDocument sdoc, SolrInputDocument parent) {
sdoc.setField(IndexSchema.NEST_PARENT_FIELD_NAME, parent.getFieldValue(uniqueKeyFieldName));
}
private void setPathField(SolrInputDocument sdoc, String fullPath) {
sdoc.setField(IndexSchema.NEST_PATH_FIELD_NAME, fullPath);
}
}
}

View File

@ -565,6 +565,9 @@
<field name="_version_" type="long" indexed="false" stored="false" docValues="true"/>
<!-- points to the root document of a block of nested documents -->
<field name="_root_" type="string" indexed="true" stored="true"/>
<!-- required for NestedUpdateProcessor -->
<field name="_NEST_PARENT_" type="string" indexed="true" stored="true"/>
<field name="_NEST_PATH_" type="string" indexed="true" stored="true"/>
<field name="multi_int_with_docvals" type="tint" multiValued="true" docValues="true" indexed="false"/>

View File

@ -30,6 +30,11 @@
<directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.RAMDirectoryFactory}"/>
<schemaFactory class="ClassicIndexSchemaFactory"/>
<updateRequestProcessorChain name="nested">
<processor class="solr.NestedUpdateProcessorFactory" />
<processor class="solr.RunUpdateProcessorFactory" />
</updateRequestProcessorChain>
<updateRequestProcessorChain name="comprehensive">
<processor class="solr.FieldLengthUpdateProcessorFactory">
<arr name="typeClass">

View File

@ -0,0 +1,195 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.update;
import java.util.List;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.update.processor.NestedUpdateProcessorFactory;
import org.apache.solr.update.processor.UpdateRequestProcessor;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
public class TestNestedUpdateProcessor extends SolrTestCaseJ4 {
private static final char PATH_SEP_CHAR = '/';
private static final char NUM_SEP_CHAR = '#';
private static final String SINGLE_VAL_CHAR = "";
private static final String grandChildId = "4";
private static final String secondChildList = "anotherChildList";
private static final String jDoc = "{\n" +
" \"add\": {\n" +
" \"doc\": {\n" +
" \"id\": \"1\",\n" +
" \"children\": [\n" +
" {\n" +
" \"id\": \"2\",\n" +
" \"foo_s\": \"Yaz\"\n" +
" \"grandChild\": \n" +
" {\n" +
" \"id\": \""+ grandChildId + "\",\n" +
" \"foo_s\": \"Jazz\"\n" +
" },\n" +
" },\n" +
" {\n" +
" \"id\": \"3\",\n" +
" \"foo_s\": \"Bar\"\n" +
" }\n" +
" ]\n" +
secondChildList + ": [{\"id\": \"4\", \"last_s\": \"Smith\"}],\n" +
" }\n" +
" }\n" +
"}";
private static final String errDoc = "{\n" +
" \"add\": {\n" +
" \"doc\": {\n" +
" \"id\": \"1\",\n" +
" \"children" + PATH_SEP_CHAR + "a\": [\n" +
" {\n" +
" \"id\": \"2\",\n" +
" \"foo_s\": \"Yaz\"\n" +
" \"grandChild\": \n" +
" {\n" +
" \"id\": \""+ grandChildId + "\",\n" +
" \"foo_s\": \"Jazz\"\n" +
" },\n" +
" },\n" +
" {\n" +
" \"id\": \"3\",\n" +
" \"foo_s\": \"Bar\"\n" +
" }\n" +
" ]\n" +
" }\n" +
" }\n" +
"}";
@Rule
public ExpectedException thrown = ExpectedException.none();
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig-update-processor-chains.xml", "schema15.xml");
}
@Before
public void before() throws Exception {
assertU(delQ("*:*"));
assertU(commit());
}
@Test
public void testDeeplyNestedURPGrandChild() throws Exception {
final String[] tests = {
"/response/docs/[0]/id=='4'",
"/response/docs/[0]/" + IndexSchema.NEST_PATH_FIELD_NAME + "=='children#0/grandChild#'"
};
indexSampleData(jDoc);
assertJQ(req("q", IndexSchema.NEST_PATH_FIELD_NAME + ":*/grandChild#*",
"fl","*",
"sort","id desc",
"wt","json"),
tests);
}
@Test
public void testDeeplyNestedURPChildren() throws Exception {
final String[] childrenTests = {
"/response/docs/[0]/id=='2'",
"/response/docs/[1]/id=='3'",
"/response/docs/[0]/" + IndexSchema.NEST_PATH_FIELD_NAME + "=='children#0'",
"/response/docs/[1]/" + IndexSchema.NEST_PATH_FIELD_NAME + "=='children#1'"
};
indexSampleData(jDoc);
assertJQ(req("q", IndexSchema.NEST_PATH_FIELD_NAME + ":children#?",
"fl","*",
"sort","id asc",
"wt","json"),
childrenTests);
assertJQ(req("q", IndexSchema.NEST_PATH_FIELD_NAME + ":anotherChildList#?",
"fl","*",
"sort","id asc",
"wt","json"),
"/response/docs/[0]/id=='4'",
"/response/docs/[0]/" + IndexSchema.NEST_PATH_FIELD_NAME + "=='anotherChildList#0'");
}
@Test
public void testDeeplyNestedURPSanity() throws Exception {
SolrInputDocument docHierarchy = sdoc("id", "1", "children", sdocs(sdoc("id", "2", "name_s", "Yaz"),
sdoc("id", "3", "name_s", "Jazz", "grandChild", sdoc("id", "4", "name_s", "Gaz"))), "lonelyChild", sdoc("id", "5", "name_s", "Loner"));
UpdateRequestProcessor nestedUpdate = new NestedUpdateProcessorFactory().getInstance(req(), null, null);
AddUpdateCommand cmd = new AddUpdateCommand(req());
cmd.solrDoc = docHierarchy;
nestedUpdate.processAdd(cmd);
cmd.clear();
List children = (List) docHierarchy.get("children").getValues();
SolrInputDocument firstChild = (SolrInputDocument) children.get(0);
assertEquals("SolrInputDocument(fields: [id=2, name_s=Yaz, _NEST_PATH_=children#0, _NEST_PARENT_=1])", firstChild.toString());
SolrInputDocument secondChild = (SolrInputDocument) children.get(1);
assertEquals("SolrInputDocument(fields: [id=3, name_s=Jazz, grandChild=SolrInputDocument(fields: [id=4, name_s=Gaz, _NEST_PATH_=children#1/grandChild#, _NEST_PARENT_=3]), _NEST_PATH_=children#1, _NEST_PARENT_=1])", secondChild.toString());
SolrInputDocument grandChild = (SolrInputDocument)((SolrInputDocument) children.get(1)).get("grandChild").getValue();
assertEquals("SolrInputDocument(fields: [id=4, name_s=Gaz, _NEST_PATH_=children#1/grandChild#, _NEST_PARENT_=3])", grandChild.toString());
SolrInputDocument singularChild = (SolrInputDocument) docHierarchy.get("lonelyChild").getValue();
assertEquals("SolrInputDocument(fields: [id=5, name_s=Loner, _NEST_PATH_=lonelyChild#, _NEST_PARENT_=1])", singularChild.toString());
}
@Test
public void testDeeplyNestedURPChildrenWoId() throws Exception {
final String rootId = "1";
final String childKey = "grandChild";
final String expectedId = rootId + "/children#1/" + childKey + NUM_SEP_CHAR + SINGLE_VAL_CHAR;
SolrInputDocument noIdChildren = sdoc("id", rootId, "children", sdocs(sdoc("name_s", "Yaz"), sdoc("name_s", "Jazz", childKey, sdoc("name_s", "Gaz"))));
UpdateRequestProcessor nestedUpdate = new NestedUpdateProcessorFactory().getInstance(req(), null, null);
AddUpdateCommand cmd = new AddUpdateCommand(req());
cmd.solrDoc = noIdChildren;
nestedUpdate.processAdd(cmd);
cmd.clear();
List children = (List) noIdChildren.get("children").getValues();
SolrInputDocument idLessChild = (SolrInputDocument)((SolrInputDocument) children.get(1)).get(childKey).getValue();
assertTrue("Id less child did not get an Id", idLessChild.containsKey("id"));
assertEquals("Id less child was assigned an unexpected id", expectedId, idLessChild.getFieldValue("id").toString());
}
@Test
public void testDeeplyNestedURPFieldNameException() throws Exception {
final String errMsg = "contains: '" + PATH_SEP_CHAR + "' , which is reserved for the nested URP";
thrown.expect(SolrException.class);
indexSampleData(errDoc);
thrown.expectMessage(errMsg);
}
private void indexSampleData(String cmd) throws Exception {
updateJ(cmd, params("update.chain", "nested"));
assertU(commit());
}
}