diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index e0a3b431a6e..cfa27b84364 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -103,6 +103,9 @@ New Features * SOLR-12495: An #EQUALS function for replica in autoscaling policy to equally distribute replicas (noble) +* SOLR-12441: New NestedUpdateProcessorFactory (URP) to populate special fields _NEST_PARENT_ and _NEST_PATH_ of nested + (child) documents. It will generate a uniqueKey of nested docs if they were blank too. (Moshe Bla, David Smiley) + Bug Fixes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/schema/IndexSchema.java b/solr/core/src/java/org/apache/solr/schema/IndexSchema.java index 7f86f27afff..2cb26c71782 100644 --- a/solr/core/src/java/org/apache/solr/schema/IndexSchema.java +++ b/solr/core/src/java/org/apache/solr/schema/IndexSchema.java @@ -107,6 +107,8 @@ public class IndexSchema { public static final String LUCENE_MATCH_VERSION_PARAM = "luceneMatchVersion"; public static final String MAX_CHARS = "maxChars"; public static final String NAME = "name"; + public static final String NEST_PARENT_FIELD_NAME = "_NEST_PARENT_"; + public static final String NEST_PATH_FIELD_NAME = "_NEST_PATH_"; public static final String REQUIRED = "required"; public static final String SCHEMA = "schema"; public static final String SIMILARITY = "similarity"; diff --git a/solr/core/src/java/org/apache/solr/update/processor/NestedUpdateProcessorFactory.java b/solr/core/src/java/org/apache/solr/update/processor/NestedUpdateProcessorFactory.java new file mode 100644 index 00000000000..aa459bd7094 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/update/processor/NestedUpdateProcessorFactory.java @@ -0,0 +1,137 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.update.processor; + +import java.io.IOException; +import java.util.Collection; + +import org.apache.solr.common.SolrException; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.SolrInputField; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.schema.IndexSchema; +import org.apache.solr.update.AddUpdateCommand; + +/** + * Adds fields to nested documents to support some nested search requirements. + * It can even generate uniqueKey fields for nested docs. + * + * @see IndexSchema#NEST_PARENT_FIELD_NAME + * @see IndexSchema#NEST_PATH_FIELD_NAME + * + * @since 7.5.0 + */ +public class NestedUpdateProcessorFactory extends UpdateRequestProcessorFactory { + + public UpdateRequestProcessor getInstance(SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next ) { + boolean storeParent = shouldStoreDocParent(req.getSchema()); + boolean storePath = shouldStoreDocPath(req.getSchema()); + if(!(storeParent || storePath)) { + return next; + } + return new NestedUpdateProcessor(req, shouldStoreDocParent(req.getSchema()), shouldStoreDocPath(req.getSchema()), next); + } + + private static boolean shouldStoreDocParent(IndexSchema schema) { + return schema.getFields().containsKey(IndexSchema.NEST_PARENT_FIELD_NAME); + } + + private static boolean shouldStoreDocPath(IndexSchema schema) { + return schema.getFields().containsKey(IndexSchema.NEST_PATH_FIELD_NAME); + } + + private static class NestedUpdateProcessor extends UpdateRequestProcessor { + private static final String PATH_SEP_CHAR = "/"; + private static final String NUM_SEP_CHAR = "#"; + private static final String SINGULAR_VALUE_CHAR = ""; + private boolean storePath; + private boolean storeParent; + private String uniqueKeyFieldName; + + + NestedUpdateProcessor(SolrQueryRequest req, boolean storeParent, boolean storePath, UpdateRequestProcessor next) { + super(next); + this.storeParent = storeParent; + this.storePath = storePath; + this.uniqueKeyFieldName = req.getSchema().getUniqueKeyField().getName(); + } + + @Override + public void processAdd(AddUpdateCommand cmd) throws IOException { + SolrInputDocument doc = cmd.getSolrInputDocument(); + processDocChildren(doc, null); + super.processAdd(cmd); + } + + private void processDocChildren(SolrInputDocument doc, String fullPath) { + for(SolrInputField field: doc.values()) { + int childNum = 0; + boolean isSingleVal = !(field.getValue() instanceof Collection); + for(Object val: field) { + if(!(val instanceof SolrInputDocument)) { + // either all collection items are child docs or none are. + break; + } + final String fieldName = field.getName(); + + if(fieldName.contains(PATH_SEP_CHAR)) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Field name: '" + fieldName + + "' contains: '" + PATH_SEP_CHAR + "' , which is reserved for the nested URP"); + } + final String sChildNum = isSingleVal ? SINGULAR_VALUE_CHAR : String.valueOf(childNum); + SolrInputDocument cDoc = (SolrInputDocument) val; + if(!cDoc.containsKey(uniqueKeyFieldName)) { + String parentDocId = doc.getField(uniqueKeyFieldName).getFirstValue().toString(); + cDoc.setField(uniqueKeyFieldName, generateChildUniqueId(parentDocId, fieldName, sChildNum)); + } + final String lastKeyPath = fieldName + NUM_SEP_CHAR + sChildNum; + // concat of all paths children.grandChild => children#1/grandChild# + final String childDocPath = fullPath == null ? lastKeyPath : fullPath + PATH_SEP_CHAR + lastKeyPath; + processChildDoc((SolrInputDocument) val, doc, childDocPath); + ++childNum; + } + } + } + + private void processChildDoc(SolrInputDocument sdoc, SolrInputDocument parent, String fullPath) { + if(storePath) { + setPathField(sdoc, fullPath); + } + if (storeParent) { + setParentKey(sdoc, parent); + } + processDocChildren(sdoc, fullPath); + } + + private String generateChildUniqueId(String parentId, String childKey, String childNum) { + // combines parentId with the child's key and childNum. e.g. "10/footnote#1" + return parentId + PATH_SEP_CHAR + childKey + NUM_SEP_CHAR + childNum; + } + + private void setParentKey(SolrInputDocument sdoc, SolrInputDocument parent) { + sdoc.setField(IndexSchema.NEST_PARENT_FIELD_NAME, parent.getFieldValue(uniqueKeyFieldName)); + } + + private void setPathField(SolrInputDocument sdoc, String fullPath) { + sdoc.setField(IndexSchema.NEST_PATH_FIELD_NAME, fullPath); + } + + } + +} diff --git a/solr/core/src/test-files/solr/collection1/conf/schema15.xml b/solr/core/src/test-files/solr/collection1/conf/schema15.xml index 80d19e9b2a8..5ca95292d06 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema15.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema15.xml @@ -565,6 +565,9 @@ + + + diff --git a/solr/core/src/test-files/solr/collection1/conf/solrconfig-update-processor-chains.xml b/solr/core/src/test-files/solr/collection1/conf/solrconfig-update-processor-chains.xml index 4113bd133eb..f22354e4ced 100644 --- a/solr/core/src/test-files/solr/collection1/conf/solrconfig-update-processor-chains.xml +++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig-update-processor-chains.xml @@ -30,6 +30,11 @@ + + + + + diff --git a/solr/core/src/test/org/apache/solr/update/TestNestedUpdateProcessor.java b/solr/core/src/test/org/apache/solr/update/TestNestedUpdateProcessor.java new file mode 100644 index 00000000000..ab36f0e1271 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/update/TestNestedUpdateProcessor.java @@ -0,0 +1,195 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.update; + +import java.util.List; + +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.schema.IndexSchema; +import org.apache.solr.update.processor.NestedUpdateProcessorFactory; +import org.apache.solr.update.processor.UpdateRequestProcessor; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; + +public class TestNestedUpdateProcessor extends SolrTestCaseJ4 { + + private static final char PATH_SEP_CHAR = '/'; + private static final char NUM_SEP_CHAR = '#'; + private static final String SINGLE_VAL_CHAR = ""; + private static final String grandChildId = "4"; + private static final String secondChildList = "anotherChildList"; + private static final String jDoc = "{\n" + + " \"add\": {\n" + + " \"doc\": {\n" + + " \"id\": \"1\",\n" + + " \"children\": [\n" + + " {\n" + + " \"id\": \"2\",\n" + + " \"foo_s\": \"Yaz\"\n" + + " \"grandChild\": \n" + + " {\n" + + " \"id\": \""+ grandChildId + "\",\n" + + " \"foo_s\": \"Jazz\"\n" + + " },\n" + + " },\n" + + " {\n" + + " \"id\": \"3\",\n" + + " \"foo_s\": \"Bar\"\n" + + " }\n" + + " ]\n" + + secondChildList + ": [{\"id\": \"4\", \"last_s\": \"Smith\"}],\n" + + " }\n" + + " }\n" + + "}"; + + private static final String errDoc = "{\n" + + " \"add\": {\n" + + " \"doc\": {\n" + + " \"id\": \"1\",\n" + + " \"children" + PATH_SEP_CHAR + "a\": [\n" + + " {\n" + + " \"id\": \"2\",\n" + + " \"foo_s\": \"Yaz\"\n" + + " \"grandChild\": \n" + + " {\n" + + " \"id\": \""+ grandChildId + "\",\n" + + " \"foo_s\": \"Jazz\"\n" + + " },\n" + + " },\n" + + " {\n" + + " \"id\": \"3\",\n" + + " \"foo_s\": \"Bar\"\n" + + " }\n" + + " ]\n" + + " }\n" + + " }\n" + + "}"; + + @Rule + public ExpectedException thrown = ExpectedException.none(); + + @BeforeClass + public static void beforeClass() throws Exception { + initCore("solrconfig-update-processor-chains.xml", "schema15.xml"); + } + + @Before + public void before() throws Exception { + assertU(delQ("*:*")); + assertU(commit()); + } + + @Test + public void testDeeplyNestedURPGrandChild() throws Exception { + final String[] tests = { + "/response/docs/[0]/id=='4'", + "/response/docs/[0]/" + IndexSchema.NEST_PATH_FIELD_NAME + "=='children#0/grandChild#'" + }; + indexSampleData(jDoc); + + assertJQ(req("q", IndexSchema.NEST_PATH_FIELD_NAME + ":*/grandChild#*", + "fl","*", + "sort","id desc", + "wt","json"), + tests); + } + + @Test + public void testDeeplyNestedURPChildren() throws Exception { + final String[] childrenTests = { + "/response/docs/[0]/id=='2'", + "/response/docs/[1]/id=='3'", + "/response/docs/[0]/" + IndexSchema.NEST_PATH_FIELD_NAME + "=='children#0'", + "/response/docs/[1]/" + IndexSchema.NEST_PATH_FIELD_NAME + "=='children#1'" + }; + indexSampleData(jDoc); + + assertJQ(req("q", IndexSchema.NEST_PATH_FIELD_NAME + ":children#?", + "fl","*", + "sort","id asc", + "wt","json"), + childrenTests); + + assertJQ(req("q", IndexSchema.NEST_PATH_FIELD_NAME + ":anotherChildList#?", + "fl","*", + "sort","id asc", + "wt","json"), + "/response/docs/[0]/id=='4'", + "/response/docs/[0]/" + IndexSchema.NEST_PATH_FIELD_NAME + "=='anotherChildList#0'"); + } + + @Test + public void testDeeplyNestedURPSanity() throws Exception { + SolrInputDocument docHierarchy = sdoc("id", "1", "children", sdocs(sdoc("id", "2", "name_s", "Yaz"), + sdoc("id", "3", "name_s", "Jazz", "grandChild", sdoc("id", "4", "name_s", "Gaz"))), "lonelyChild", sdoc("id", "5", "name_s", "Loner")); + UpdateRequestProcessor nestedUpdate = new NestedUpdateProcessorFactory().getInstance(req(), null, null); + AddUpdateCommand cmd = new AddUpdateCommand(req()); + cmd.solrDoc = docHierarchy; + nestedUpdate.processAdd(cmd); + cmd.clear(); + + List children = (List) docHierarchy.get("children").getValues(); + + SolrInputDocument firstChild = (SolrInputDocument) children.get(0); + assertEquals("SolrInputDocument(fields: [id=2, name_s=Yaz, _NEST_PATH_=children#0, _NEST_PARENT_=1])", firstChild.toString()); + + SolrInputDocument secondChild = (SolrInputDocument) children.get(1); + assertEquals("SolrInputDocument(fields: [id=3, name_s=Jazz, grandChild=SolrInputDocument(fields: [id=4, name_s=Gaz, _NEST_PATH_=children#1/grandChild#, _NEST_PARENT_=3]), _NEST_PATH_=children#1, _NEST_PARENT_=1])", secondChild.toString()); + + SolrInputDocument grandChild = (SolrInputDocument)((SolrInputDocument) children.get(1)).get("grandChild").getValue(); + assertEquals("SolrInputDocument(fields: [id=4, name_s=Gaz, _NEST_PATH_=children#1/grandChild#, _NEST_PARENT_=3])", grandChild.toString()); + + SolrInputDocument singularChild = (SolrInputDocument) docHierarchy.get("lonelyChild").getValue(); + assertEquals("SolrInputDocument(fields: [id=5, name_s=Loner, _NEST_PATH_=lonelyChild#, _NEST_PARENT_=1])", singularChild.toString()); + } + + @Test + public void testDeeplyNestedURPChildrenWoId() throws Exception { + final String rootId = "1"; + final String childKey = "grandChild"; + final String expectedId = rootId + "/children#1/" + childKey + NUM_SEP_CHAR + SINGLE_VAL_CHAR; + SolrInputDocument noIdChildren = sdoc("id", rootId, "children", sdocs(sdoc("name_s", "Yaz"), sdoc("name_s", "Jazz", childKey, sdoc("name_s", "Gaz")))); + UpdateRequestProcessor nestedUpdate = new NestedUpdateProcessorFactory().getInstance(req(), null, null); + AddUpdateCommand cmd = new AddUpdateCommand(req()); + cmd.solrDoc = noIdChildren; + nestedUpdate.processAdd(cmd); + cmd.clear(); + List children = (List) noIdChildren.get("children").getValues(); + SolrInputDocument idLessChild = (SolrInputDocument)((SolrInputDocument) children.get(1)).get(childKey).getValue(); + assertTrue("Id less child did not get an Id", idLessChild.containsKey("id")); + assertEquals("Id less child was assigned an unexpected id", expectedId, idLessChild.getFieldValue("id").toString()); + } + + @Test + public void testDeeplyNestedURPFieldNameException() throws Exception { + final String errMsg = "contains: '" + PATH_SEP_CHAR + "' , which is reserved for the nested URP"; + thrown.expect(SolrException.class); + indexSampleData(errDoc); + thrown.expectMessage(errMsg); + } + + private void indexSampleData(String cmd) throws Exception { + updateJ(cmd, params("update.chain", "nested")); + assertU(commit()); + } +}