mirror of https://github.com/apache/lucene.git
SOLR-14383: Fix nested indexing ref-guide documentation and corrisponding search examples
Squashed commit of branch jira/SOLR-14383 as of... commit34da84c238
Merge:5d293276a3
8bd79ec301
Author: Chris Hostetter <hossman@apache.org> Date: Tue Sep 1 16:39:03 2020 -0700 Merge branch 'master' into jira/SOLR-14383
This commit is contained in:
parent
b867ae49d3
commit
a70a47d053
|
@ -560,6 +560,8 @@
|
|||
<field name="_version_" type="long" indexed="false" stored="false" docValues="true"/>
|
||||
<!-- points to the root document of a block of nested documents -->
|
||||
<field name="_root_" type="string" indexed="true" stored="true"/>
|
||||
<!-- for nested documents (relationship tracking) -->
|
||||
<field name="_nest_path_" type="_nest_path_" /><fieldType name="_nest_path_" class="solr.NestPathField" />
|
||||
|
||||
<field name="multi_int_with_docvals" type="tint" multiValued="true" docValues="true" indexed="false"/>
|
||||
|
||||
|
|
|
@ -523,10 +523,8 @@ public class QueryEqualityTest extends SolrTestCaseJ4 {
|
|||
assertQueryEquals(null, "{!parent which='+*:* -foo_s:parent'}",
|
||||
"{!child of=foo_s:parent}");
|
||||
|
||||
final SolrQueryRequest req = req(
|
||||
"fq","bar_s:baz","fq","{!tag=fqban}bar_s:ban",
|
||||
"ffq","bar_s:baz","ffq","{!tag=ffqban}bar_s:ban");
|
||||
try {
|
||||
try (SolrQueryRequest req = req("fq","bar_s:baz","fq","{!tag=fqban}bar_s:ban",
|
||||
"ffq","bar_s:baz","ffq","{!tag=ffqban}bar_s:ban")) {
|
||||
assertQueryEquals("filters", req,
|
||||
"{!parent which=foo_s:parent param=$fq}foo_s:bar",
|
||||
"{!parent which=foo_s:parent param=$ffq}foo_s:bar" // differently named params
|
||||
|
@ -539,9 +537,42 @@ public class QueryEqualityTest extends SolrTestCaseJ4 {
|
|||
QueryUtils.checkUnequal(// parent filter is not an equal to child
|
||||
QParser.getParser("{!child of=foo_s:parent}", req).getQuery(),
|
||||
QParser.getParser("{!parent which=foo_s:parent}", req).getQuery());
|
||||
}
|
||||
|
||||
// sanity check multiple ways of specifing _nest_path_ prefixes
|
||||
final String parent_path = "/aa/bb";
|
||||
try (SolrQueryRequest req = req("parent_filt", "(*:* -{!prefix f='_nest_path_' v='"+parent_path+"/'})",
|
||||
"child_q", "(+foo +{!prefix f='_nest_path_' v='"+parent_path+"/'})",
|
||||
"parent_q", "(+bar +{!field f='_nest_path_' v='"+parent_path+"'})")) {
|
||||
|
||||
assertQueryEquals("parent", req,
|
||||
|
||||
// using local params to refer to other query params using 'prefix' parser...
|
||||
"{!parent which=$parent_filt v=$child_q}",
|
||||
|
||||
// using 'inline' prefix query syntax...
|
||||
//
|
||||
// '/' has to be escaped other wise it will be treated as a regex query...
|
||||
// ...and when used inside the 'which' param it has to be escaped *AGAIN* because of
|
||||
// the "quoted" localparam evaluation layer...
|
||||
// (and of course '\' escaping is the java syntax as well, we have to double it)
|
||||
"{!parent which='*:* -_nest_path_:"+(parent_path + "/").replace("/","\\\\/") +"*'}"
|
||||
+ "(+foo +_nest_path_:" + (parent_path + "/").replace("/", "\\/") + "*)");
|
||||
|
||||
assertQueryEquals("child", req,
|
||||
|
||||
// using local params to refer to other query params using 'prefix' parser...
|
||||
"{!child of=$parent_filt v=$parent_q}",
|
||||
|
||||
// using 'inline' prefix query syntax...
|
||||
//
|
||||
// '/' has to be escaped other wise it will be treated as a regex query...
|
||||
// ...and when used inside the 'which' param it has to be escaped *AGAIN* because of
|
||||
// the "quoted" localparam evaluation layer...
|
||||
// (and of course '\' escaping is the java syntax as well, we have to double it)
|
||||
"{!child of='*:* -_nest_path_:"+(parent_path + "/").replace("/","\\\\/") +"*'}"
|
||||
+ "(+bar +_nest_path_:" + parent_path.replace("/", "\\/") + ")");
|
||||
|
||||
} finally {
|
||||
req.close();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -17,7 +17,15 @@
|
|||
|
||||
package org.apache.solr.update;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.common.SolrException;
|
||||
|
@ -25,6 +33,7 @@ import org.apache.solr.common.SolrInputDocument;
|
|||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.update.processor.NestedUpdateProcessorFactory;
|
||||
import org.apache.solr.update.processor.UpdateRequestProcessor;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.junit.Before;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Rule;
|
||||
|
@ -209,4 +218,339 @@ public class TestNestedUpdateProcessor extends SolrTestCaseJ4 {
|
|||
updateJ(cmd, null);
|
||||
assertU(commit());
|
||||
}
|
||||
|
||||
/**
|
||||
* Randomized test to look for flaws in the documented approach for building "safe" values of the
|
||||
* <code>of</code> / <code>which</code> params in the <code>child</code> / <code>parent</code> QParsers
|
||||
* when a specific <code>_nest_path_</code> is desired
|
||||
*
|
||||
* @see <a href="https://issues.apache.org/jira/browse/SOLR-14687">SOLR-14687</a>
|
||||
*/
|
||||
public void testRandomNestPathQueryFiltering() throws Exception {
|
||||
|
||||
// First: build a bunch of complex randomly nested documents, with random "nest paths"
|
||||
// re-use the same "path segments" at various levels of nested, so as to confuse things even more
|
||||
final RandomNestedDocModel docs = new RandomNestedDocModel();
|
||||
for (int i = 0; i < 50; i++) {
|
||||
final SolrInputDocument rootDoc = docs.buildRandomDoc();
|
||||
assertU(adoc(rootDoc));
|
||||
}
|
||||
assertU(commit());
|
||||
|
||||
// now do some systematic parent/child queries.
|
||||
// we're checking both for "parser errors" (ie: children matching "parent filter")
|
||||
// as well as that the test_path_s of all matching docs meets our expectations
|
||||
|
||||
// *:* w/ parent parser...
|
||||
// starts at "root" parent_path and recurses until we get no (expected) results
|
||||
assertTrue(// we expected at least one query for every "real" path,
|
||||
// but there will be more because we'll try lots of sub-paths that have no docs
|
||||
docs.numDocsDescendentFromPath.keySet().size()
|
||||
< docs.recursiveCheckParentQueryOfAllChildren(Collections.<String>emptyList()));
|
||||
// sanity check: path that is garunteed not to exist...
|
||||
assertEquals(1, docs.recursiveCheckParentQueryOfAllChildren(Arrays.asList("xxx", "yyy")));
|
||||
|
||||
// *:* w/ child parser...
|
||||
// starts at "root" parent_path and recurses until we get no (expected) results
|
||||
assertTrue(// we expected at least one query for every "real" path,
|
||||
// but there will be more because we'll try lots of sub-paths that have no docs
|
||||
docs.numDocsWithPathWithKids.keySet().size()
|
||||
< docs.recursiveCheckChildQueryOfAllParents(Collections.<String>emptyList()));
|
||||
// sanity check: path that is garunteed not to exist...
|
||||
assertEquals(1, docs.recursiveCheckChildQueryOfAllParents(Arrays.asList("xxx", "yyy")));
|
||||
|
||||
// quering against individual child ids w/ both parent & child parser...
|
||||
docs.checkParentAndChildQueriesOfEachDocument();
|
||||
}
|
||||
|
||||
private static class RandomNestedDocModel {
|
||||
public static final List<String> PATH_ELEMENTS = Arrays.asList("aa", "bb", "cc", "dd");
|
||||
|
||||
private final Map<String,SolrInputDocument> allDocs = new HashMap<>();
|
||||
|
||||
public final Map<String,Integer> numDocsDescendentFromPath = new HashMap<>();
|
||||
public final Map<String,Integer> numDocsWithPathWithKids = new HashMap<>();
|
||||
|
||||
private int idCounter = 0;
|
||||
|
||||
public synchronized SolrInputDocument buildRandomDoc() {
|
||||
return buildRandomDoc(null, Collections.<String>emptyList(), 15);
|
||||
}
|
||||
private static String joinPath(List<String> test_path) {
|
||||
return "/" + String.join("/", test_path);
|
||||
}
|
||||
private synchronized SolrInputDocument buildRandomDoc(SolrInputDocument parent,
|
||||
List<String> test_path,
|
||||
int maxDepthAndBreadth) {
|
||||
final String path_string = joinPath(test_path);
|
||||
final String id = "" + (++idCounter);
|
||||
maxDepthAndBreadth--;
|
||||
final SolrInputDocument doc = sdoc
|
||||
("id", id,
|
||||
// may change, but we want it 0 even if we never add any
|
||||
"num_direct_kids_s", "0",
|
||||
// conceptually matches _nest_path_ but should be easier to make assertions about (no inline position #s)
|
||||
"test_path_s", path_string);
|
||||
if (null != parent) {
|
||||
// order matters: if we add the Collection first, SolrInputDocument will try to reuse it
|
||||
doc.addField("ancestor_ids_ss", parent.getFieldValue("id"));
|
||||
if (parent.containsKey("ancestor_ids_ss")) { // sigh: getFieldValues returns null, not empty collection
|
||||
doc.addField("ancestor_ids_ss", parent.getFieldValues("ancestor_ids_ss"));
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < test_path.size(); i++) {
|
||||
// NOTE: '<' not '<=" .. we only includes paths we are descendents of, not our full path...
|
||||
numDocsDescendentFromPath.merge(joinPath(test_path.subList(0, i)), 1, Math::addExact);
|
||||
}
|
||||
|
||||
if (0 < maxDepthAndBreadth) {
|
||||
final int numDirectKids = TestUtil.nextInt(random(), 0, Math.min(4, maxDepthAndBreadth));
|
||||
doc.setField("num_direct_kids_s", "" + numDirectKids);
|
||||
if (0 < numDirectKids) {
|
||||
numDocsWithPathWithKids.merge(path_string, 1, Math::addExact);
|
||||
}
|
||||
maxDepthAndBreadth -= numDirectKids;
|
||||
for (int i = 0; i < numDirectKids; i++) {
|
||||
final String kidType = PATH_ELEMENTS.get(random().nextInt(PATH_ELEMENTS.size()));
|
||||
final List<String> kid_path = new ArrayList<>(test_path);
|
||||
kid_path.add(kidType);
|
||||
final SolrInputDocument kid = buildRandomDoc(doc, kid_path, maxDepthAndBreadth);
|
||||
doc.addField(kidType, kid);
|
||||
// order matters: if we add the Collection first, SolrInputDocument will try to reuse it
|
||||
doc.addField("descendent_ids_ss", kid.getFieldValue("id"));
|
||||
if (kid.containsKey("descendent_ids_ss")) { // sigh: getFieldValues returns null, not empty collection
|
||||
doc.addField("descendent_ids_ss", kid.getFieldValues("descendent_ids_ss"));
|
||||
}
|
||||
}
|
||||
}
|
||||
allDocs.put(id, doc);
|
||||
return doc;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Loops over the 'model' of every document we've indexed, asserting that
|
||||
* parent/child queries wrapping an '<code>id:foo</code> using various paths
|
||||
* match the expected ancestors/descendents
|
||||
*/
|
||||
public void checkParentAndChildQueriesOfEachDocument() {
|
||||
assertFalse("You didn't build any docs", allDocs.isEmpty());
|
||||
|
||||
for (String doc_id : allDocs.keySet()) {
|
||||
final String doc_path = allDocs.get(doc_id).getFieldValue("test_path_s").toString();
|
||||
|
||||
if ( ! doc_path.equals("/") ) {
|
||||
|
||||
// doc_id -> descdentId must have at least one ancestor (since it's not a root level document)
|
||||
final String descendentId = doc_id;
|
||||
assert allDocs.get(descendentId).containsKey("ancestor_ids_ss");
|
||||
final List<Object> allAncestorIds = new ArrayList<>(allDocs.get(descendentId).getFieldValues("ancestor_ids_ss"));
|
||||
|
||||
// pick a random ancestor to use in our testing...
|
||||
final String ancestorId = allAncestorIds.get(random().nextInt(allAncestorIds.size())).toString();
|
||||
final String ancestor_path = allDocs.get(ancestorId).getFieldValue("test_path_s").toString();
|
||||
|
||||
final Collection<Object> allOfAncestorsDescendentIds
|
||||
= allDocs.get(ancestorId).getFieldValues("descendent_ids_ss");
|
||||
|
||||
assertTrue("Sanity check " + ancestorId + " ancestor of " + descendentId,
|
||||
allOfAncestorsDescendentIds.contains(descendentId));
|
||||
|
||||
// now we should be able to assert that a 'parent' query wrapped around a query for the descendentId
|
||||
// using the ancestor_path should match exactly one doc: our ancestorId...
|
||||
assertQ(req(parentQueryMaker(ancestor_path, "id:" + descendentId),
|
||||
"_trace_path_tested", ancestor_path,
|
||||
"fl", "id",
|
||||
"indent", "true")
|
||||
, "//result/@numFound=1"
|
||||
, "//doc/str[@name='id'][.='"+ancestorId+"']"
|
||||
);
|
||||
|
||||
// meanwhile, a 'child' query wrapped arround a query for the ancestorId, using the ancestor_path,
|
||||
// should match all of it's descendents (for simplicity we'll check just the numFound and the
|
||||
// 'descendentId' we started with)
|
||||
assertQ(req(childQueryMaker(ancestor_path, "id:" + ancestorId),
|
||||
"_trace_path_tested", ancestor_path,
|
||||
"rows", "9999",
|
||||
"fl", "id",
|
||||
"indent", "true")
|
||||
, "//result/@numFound="+allOfAncestorsDescendentIds.size()
|
||||
, "//doc/str[@name='id'][.='"+descendentId+"']"
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
// regardless of wether doc_id has an ancestor or not, a 'parent' query with a path that isn't a
|
||||
// prefix of the path of the (child) doc_id in the wrapped query should match 0 docs w/o failing
|
||||
assertQ(req(parentQueryMaker("/xxx/yyy", "id:" + doc_id),
|
||||
"_trace_path_tested", "/xxx/yyy",
|
||||
"indent", "true")
|
||||
, "//result/@numFound=0");
|
||||
|
||||
// likewise: a 'child' query wrapped around a query for our doc_id (regardless of wether if has
|
||||
// any kids), using a path that doesn't start with the same prefix as doc_id, should match 0
|
||||
// docs w/o failing
|
||||
assertQ(req(childQueryMaker("/xxx/yyy", "id:" + doc_id),
|
||||
"_trace_path_tested", "/xxx/yyy",
|
||||
"indent", "true")
|
||||
, "//result/@numFound=0");
|
||||
|
||||
// lastly: wrapping a child query around a query for our doc_id, using a path that "extends"
|
||||
// the doc_id's path should always get 0 results if that path doesn't match any actual kids
|
||||
// (regardless of wether doc_id has any children/descendents)
|
||||
assertQ(req(childQueryMaker(doc_path + "/xxx/yyy", "id:" + doc_id),
|
||||
"_trace_path_tested", doc_path + "/xxx/yyy",
|
||||
"indent", "true")
|
||||
, "//result/@numFound=0");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* recursively check path permutations using <code>*:*</code> inner query, asserting that the
|
||||
* only docs matched have the expected path, and at least one kid (since this is the "parents" parser)
|
||||
*
|
||||
* (using <code>*:*</code> as our inner query keeps the validation simple and also helps stress out
|
||||
* risk of matching incorrect docs if the 'which' param is wrong)
|
||||
*
|
||||
* @return total number of queries checked (assuming no assertion failures)
|
||||
*/
|
||||
public int recursiveCheckParentQueryOfAllChildren(List<String> parent_path) {
|
||||
final String p = joinPath(parent_path);
|
||||
final int expectedParents = numDocsWithPathWithKids.getOrDefault(p, 0);
|
||||
assertQ(req(parentQueryMaker(p, "*:*"),
|
||||
"rows", "9999",
|
||||
"_trace_path_tested", p,
|
||||
"fl", "test_path_s,num_direct_kids_s",
|
||||
"indent", "true")
|
||||
, "//result/@numFound="+expectedParents
|
||||
, "count(//doc)="+expectedParents
|
||||
, "count(//doc/str[@name='test_path_s'][.='"+p+"'])="+expectedParents
|
||||
, "0=count(//doc/str[@name='num_direct_kids_s'][.='0'])"
|
||||
);
|
||||
int numChecked = 1;
|
||||
|
||||
// no point in recursing on the current path if we already have no results found...
|
||||
if (0 < expectedParents) {
|
||||
for (String next : PATH_ELEMENTS) {
|
||||
final List<String> next_path = new ArrayList<>(parent_path);
|
||||
next_path.add(next);
|
||||
numChecked += recursiveCheckParentQueryOfAllChildren(next_path);
|
||||
}
|
||||
}
|
||||
return numChecked;
|
||||
}
|
||||
|
||||
/**
|
||||
* This implements the "safe query based on parent path" rules we're sanity checking.
|
||||
*
|
||||
* @param parent_path the nest path of the parents to consider
|
||||
* @param inner_child_query the specific children whose ancestors we are looking for, must be simple string <code>*:*</code> or <code>id:foo</code>
|
||||
*/
|
||||
private SolrParams parentQueryMaker(String parent_path, String inner_child_query) {
|
||||
assertValidPathSytax(parent_path);
|
||||
final boolean verbose = random().nextBoolean();
|
||||
|
||||
if (parent_path.equals("/")) {
|
||||
if (verbose) {
|
||||
return params("q", "{!parent which=$parent_filt v=$child_q}",
|
||||
"parent_filt", "(*:* -_nest_path_:*)",
|
||||
"child_q", "(+" + inner_child_query + " +_nest_path_:*)");
|
||||
} else {
|
||||
return params("q", "{!parent which='(*:* -_nest_path_:*)'}(+" + inner_child_query + " +_nest_path_:*)");
|
||||
}
|
||||
} // else...
|
||||
|
||||
if (verbose) {
|
||||
final String path = parent_path + "/";
|
||||
return params("q", "{!parent which=$parent_filt v=$child_q}",
|
||||
"parent_filt", "(*:* -{!prefix f='_nest_path_' v='"+path+"'})",
|
||||
"child_q", "(+" + inner_child_query + " +{!prefix f='_nest_path_' v='"+path+"'})");
|
||||
} else {
|
||||
// '/' has to be escaped other wise it will be treated as a regex query...
|
||||
// (and of course '\' escaping is the java syntax as well, we have to double it)
|
||||
final String path = (parent_path + "/").replace("/", "\\/");
|
||||
// ...and when used inside the 'which' param it has to be escaped *AGAIN* because of
|
||||
// the "quoted" localparam evaluation layer...
|
||||
return params("q", "{!parent which='(*:* -_nest_path_:" + path.replace("\\/","\\\\/") + "*)'}"
|
||||
+ "(+" + inner_child_query + " +_nest_path_:" + path + "*)");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* recursively check path permutations using <code>*:*</code> inner query, asserting that the
|
||||
* only docs matched have paths that include the specified path as a (strict) prefix
|
||||
*
|
||||
* (using <code>*:*</code> as our inner query keeps the validation simple and also helps stress out
|
||||
* risk of matching incorrect docs if the 'of' param is wrong)
|
||||
*
|
||||
* @return total number of queries checked (assuming no assertion failures)
|
||||
*/
|
||||
public int recursiveCheckChildQueryOfAllParents(List<String> parent_path) {
|
||||
final String p = joinPath(parent_path);
|
||||
final int expectedMatches = numDocsDescendentFromPath.getOrDefault(p, 0);
|
||||
assertQ(req(childQueryMaker(p, "*:*"),
|
||||
"rows", "9999",
|
||||
"_trace_path_tested", p,
|
||||
"fl", "test_path_s",
|
||||
"indent", "true")
|
||||
, "//result/@numFound="+expectedMatches
|
||||
, "count(//doc)="+expectedMatches
|
||||
, "count(//doc/str[@name='test_path_s'][starts-with(., '"+p+"')])="+expectedMatches
|
||||
);
|
||||
int numChecked = 1;
|
||||
|
||||
// no point in recursing on the current path if we already have no results found...
|
||||
if (0 < expectedMatches) {
|
||||
for (String next : PATH_ELEMENTS) {
|
||||
final List<String> next_path = new ArrayList<>(parent_path);
|
||||
next_path.add(next);
|
||||
numChecked += recursiveCheckChildQueryOfAllParents(next_path);
|
||||
}
|
||||
}
|
||||
return numChecked;
|
||||
}
|
||||
|
||||
/**
|
||||
* This implements the "safe query based on parent path" rules we're sanity checking.
|
||||
*
|
||||
* @param parent_path the nest path of the parents to consider
|
||||
* @param inner_parent_query the specific parents whose descendents we are looking for, must be simple string <code>*:*</code> or <code>id:foo</code>
|
||||
*/
|
||||
private SolrParams childQueryMaker(String parent_path, String inner_parent_query) {
|
||||
assertValidPathSytax(parent_path);
|
||||
final boolean verbose = random().nextBoolean();
|
||||
|
||||
if (parent_path.equals("/")) {
|
||||
if (verbose) {
|
||||
return params("q", "{!child of=$parent_filt v=$parent_q})",
|
||||
"parent_filt", "(*:* -_nest_path_:*)",
|
||||
"parent_q", "(+" + inner_parent_query + " -_nest_path_:*)");
|
||||
} else {
|
||||
return params("q", "{!child of='(*:* -_nest_path_:*)'}(+" + inner_parent_query + " -_nest_path_:*)");
|
||||
}
|
||||
} // else...
|
||||
|
||||
if (verbose) {
|
||||
return params("q", "{!child of=$parent_filt v=$parent_q})",
|
||||
"parent_filt", "(*:* -{!prefix f='_nest_path_' v='"+parent_path+"/'})",
|
||||
"parent_q", "(+" + inner_parent_query + " +{!field f='_nest_path_' v='"+parent_path+"'})");
|
||||
} else {
|
||||
// '/' has to be escaped other wise it will be treated as a regex query...
|
||||
// (and of course '\' escaping is the java syntax as well, we have to double it)
|
||||
final String exact_path = parent_path.replace("/", "\\/");
|
||||
// ...and when used inside the 'which' param it has to be escaped *AGAIN* because of
|
||||
// the "quoted" localparam evaluation layer...
|
||||
final String prefix_path = (parent_path + "/").replace("/","\\\\/");
|
||||
return params("q", "{!child of='(*:* -_nest_path_:"+prefix_path+"*)'}"
|
||||
+ "(+" + inner_parent_query + " +_nest_path_:" + exact_path + ")");
|
||||
}
|
||||
}
|
||||
|
||||
private void assertValidPathSytax(String path) {
|
||||
assert path.startsWith("/");
|
||||
assert (1 == path.length()) ^ ! path.endsWith("/");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
= Indexing Nested Child Documents
|
||||
:solr-root-path: ../../
|
||||
:example-source-dir: {solr-root-path}solrj/src/test/org/apache/solr/client/ref_guide_examples/
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
|
@ -17,137 +19,478 @@
|
|||
// under the License.
|
||||
|
||||
Solr supports indexing nested documents, described here, and ways to <<searching-nested-documents.adoc#searching-nested-documents,search and retrieve>> them very efficiently.
|
||||
By way of example, nested documents in Solr can be used to bind a blog post (parent document) with comments (child documents)
|
||||
-- or products as parent documents and sizes, colors, or other variations as child documents. +
|
||||
The parent with all children is referred to as a nested document or "block" and it explains some of the nomenclature of related features.
|
||||
By way of examples: nested documents in Solr can be used to bind a blog post (parent document) with comments (child documents) -- or as a way to model major product lines as parent documents, with multiple types of child documents representing individual SKUs (with unique sizes / colors) and supporting documention (either directly nested under the products, or under individual SKUs.
|
||||
The "top most" parent with all children is referred to as a "root level" document or "block document" and it explains some of the nomenclature of related features.
|
||||
At query time, the <<other-parsers.adoc#block-join-query-parsers,Block Join Query Parsers>> can search these relationships,
|
||||
and the `<<transforming-result-documents.adoc#child-childdoctransformerfactory,[child]>>` Document Transformer can attach child documents to the result documents.
|
||||
In terms of performance, indexing the relationships between documents usually yields much faster queries than an equivalent "query time join",
|
||||
and the `<<transforming-result-documents.adoc#child-childdoctransformerfactory,[child]>>` Document Transformer can attach child (or other "descendent") documents to the result documents.
|
||||
In terms of performance, indexing the relationships between documents usually yields much faster queries than an equivalent "<<other-parsers#join-query-parser,query time join>>",
|
||||
since the relationships are already stored in the index and do not need to be computed.
|
||||
However, nested documents are less flexible than query time joins as it imposes rules that some applications may not be able to accept.
|
||||
Nested documents may be indexed via either the XML or JSON data syntax, and is also supported by <<using-solrj.adoc#using-solrj,SolrJ>> with javabin.
|
||||
|
||||
|
||||
[CAUTION]
|
||||
====
|
||||
.Re-Indexing Considerations
|
||||
With the exception of in-place updates, <<#maintaining-integrity-with-updates-and-deletes,blocks of nested documents must be updated/deleted together>>. Modifying or replacing individual child documents requires re-indexing of the entire block (either explicitly/externally, or under the covers inside of Solr). For some applications this may result in a lot of extra indexing overhead and may not be worth the performance gains at query time.
|
||||
====
|
||||
|
||||
[#example-indexing-syntax]
|
||||
== Example Indexing Syntax: Psuedo-Fields
|
||||
|
||||
This example shows what it looks like to index two root level "product" documents, each containing two different types of child documents specified in "psuedo-fields": "skus" and "manuals". Two of the "sku" type documents have their own nested child "manuals" documents...
|
||||
|
||||
[NOTE]
|
||||
====
|
||||
.Limitation
|
||||
With the exception of in-place updates, the whole block must be updated or deleted together, not separately. For some applications this may result in tons of extra indexing and thus may be a deal-breaker.
|
||||
Even though the child documents in these examples are provided syntactically as field values syntactically, this is simply a matter of syntax and as such `skus` and `manuals` are not actual fields in the documents. Consequently, these field names need not be defined in the schema and probably shouldn't be as it would be confusing. There is no "child document" field type.
|
||||
====
|
||||
|
||||
== Schema Configuration
|
||||
//
|
||||
// DO NOT MODIFY THESE EXAMPLE DOCS WITH OUT REVIEWING ALL PAGES THAT INCLUDE/REFER BACK TO THESE EXAMPLES
|
||||
// INCLUDING THE SEMI-EQUIVILENT ANONYMOUS CHILDREN EXAMPLE AT THE BOTTOM OF THIS PAGE
|
||||
//
|
||||
[.dynamic-tabs]
|
||||
--
|
||||
[example.tab-pane#json]
|
||||
====
|
||||
[.tab-label]*JSON*
|
||||
// tag::sample-indexing-deeply-nested-documents[]
|
||||
[source,json]
|
||||
----
|
||||
[{ "id": "P11!prod",
|
||||
"name_s": "Swingline Stapler",
|
||||
"description_t": "The Cadillac of office staplers ...",
|
||||
"skus": [ { "id": "P11!S21",
|
||||
"color_s": "RED",
|
||||
"price_i": 42,
|
||||
"manuals": [ { "id": "P11!D41",
|
||||
"name_s": "Red Swingline Brochure",
|
||||
"pages_i":1,
|
||||
"content_t": "..."
|
||||
} ]
|
||||
},
|
||||
{ "id": "P11!S31",
|
||||
"color_s": "BLACK",
|
||||
"price_i": 3
|
||||
} ],
|
||||
"manuals": [ { "id": "P11!D51",
|
||||
"name_s": "Quick Reference Guide",
|
||||
"pages_i":1,
|
||||
"content_t": "How to use your stapler ..."
|
||||
},
|
||||
{ "id": "P11!D61",
|
||||
"name_s": "Warranty Details",
|
||||
"pages_i":42,
|
||||
"content_t": "... lifetime guarantee ..."
|
||||
} ]
|
||||
},
|
||||
{ "id": "P22!prod",
|
||||
"name_s": "Mont Blanc Fountain Pen",
|
||||
"description_t": "A Premium Writing Instrument ...",
|
||||
"skus": [ { "id": "P22!S22",
|
||||
"color_s": "RED",
|
||||
"price_i": 89,
|
||||
"manuals": [ { "id": "P22!D42",
|
||||
"name_s": "Red Mont Blanc Brochure",
|
||||
"pages_i":1,
|
||||
"content_t": "..."
|
||||
} ]
|
||||
},
|
||||
{ "id": "P22!S32",
|
||||
"color_s": "BLACK",
|
||||
"price_i": 67
|
||||
} ],
|
||||
"manuals": [ { "id": "P22!D52",
|
||||
"name_s": "How To Use A Pen",
|
||||
"pages_i":42,
|
||||
"content_t": "Start by removing the cap ..."
|
||||
} ]
|
||||
} ]
|
||||
----
|
||||
// end::sample-indexing-deeply-nested-documents[]
|
||||
|
||||
* The schema must include an indexed field `\_root_`. Solr automatically populates this with the value of the top/parent ID. +
|
||||
`<field name="\_root_" type="string" indexed="true" stored="false" docValues="false" />`
|
||||
** `\_root_` must be set either as stored (stored="true") or doc values (docValues="true") to enable
|
||||
<<updating-parts-of-documents#updating-child-documents, atomic updates of nested documents>>. Also, beware of `uniqueBlock(\_root_)` <<json-facet-api#stat-facet-functions,field type limitation>>, if you plan to use one.
|
||||
* `\_nest_path_` is populated by Solr automatically with the path of the document in the hierarchy for non-root documents. This field is optional. +
|
||||
`<fieldType name="\_nest_path_" class="solr.NestPathField" />
|
||||
<field name="\_nest_path_" type="_nest_path_" />`
|
||||
* `\_nest_parent_` is populated by Solr automatically to store the ID of each document's parent document (if there is one). This field is optional. +
|
||||
`<field name="\_nest_parent_" type="string" indexed="true" stored="true"/>`
|
||||
* Nested documents are very much documents in their own right even if certain nested documents hold different information from the parent.
|
||||
Therefore:
|
||||
** a field can only be configured one way no matter what sort of document uses it
|
||||
** it may be infeasible to use `required`
|
||||
** even child documents need a unique ID
|
||||
* Even though child documents are provided as field values syntactically and with SolrJ, it's a matter of syntax and it isn't an actual field in the schema.
|
||||
Consequently, the field need not be defined in the schema and probably shouldn't be as it would be confusing.
|
||||
There is no child document field type, at least not yet.
|
||||
|
||||
=== Rudimentary Root-only Schemas
|
||||
|
||||
These schemas do not contain any other nested related fields apart from `\_root_`.
|
||||
Many schemas in existence are this way simply because default configsets are this way, even if the application isn't using nested documents.
|
||||
If an application uses nested documents with such a schema, keep in mind that that some related features aren't as effective since there is less information. Mainly the <<searching-nested-documents.adoc#child-doc-transformer,[child]>> transformer returns matching children in a flat list (not nested) and it's attached to the parent using the special field name `\_childDocuments_`.
|
||||
|
||||
With such a schema, typically you should have a field that differentiates a root doc from any nested children.
|
||||
However this isn't strictly necessary; so long as it's possible to write a query that can select only root documents somehow.
|
||||
Such a query is needed for the <<other-parsers.adoc#block-join-query-parsers,block join query parsers>> and <<searching-nested-documents.adoc#child-doc-transformer,[child]>> doc transformer to function.
|
||||
|
||||
=== XML Examples
|
||||
|
||||
Here are two documents and their child documents.
|
||||
It illustrates two styles of adding child documents: the first is associated via a field "comment" (preferred),
|
||||
and the second is done in the classic way now referred to as an "anonymous" or "unlabelled" child document.
|
||||
This field label relationship is available to the URP chain in Solr but is ultimately discarded unless the special fields are defined.
|
||||
[CAUTION]
|
||||
=====
|
||||
The <<uploading-data-with-index-handlers#json-update-convenience-paths,`/update/json/docs` convenience path>> will automatically flatten complex JSON documents by default -- so to index nested JSON documents make sure to use `/update`.
|
||||
=====
|
||||
====
|
||||
|
||||
[example.tab-pane#xml]
|
||||
====
|
||||
[.tab-label]*XML*
|
||||
[source,xml]
|
||||
----
|
||||
<add>
|
||||
<doc>
|
||||
<field name="ID">1</field>
|
||||
<field name="title">Solr adds block join support</field>
|
||||
<field name="content_type">parentDocument</field>
|
||||
<field name="content">
|
||||
<field name="id">P11!prod</field>
|
||||
<field name="name_s">Swingline Stapler</field>
|
||||
<field name="description_t">The Cadillac of office staplers ...</field>
|
||||
<field name="skus">
|
||||
<doc>
|
||||
<field name="ID">2</field>
|
||||
<field name="comments">SolrCloud supports it too!</field>
|
||||
<field name="id">P11!S21</field>
|
||||
<field name="color_s">RED</field>
|
||||
<field name="price_i">42</field>
|
||||
<field name="manuals">
|
||||
<doc>
|
||||
<field name="id">P11!D41</field>
|
||||
<field name="name_s">Red Swingline Brochure</field>
|
||||
<field name="pages_i">1</field>
|
||||
<field name="content_t">...</field>
|
||||
</doc>
|
||||
</field>
|
||||
</doc>
|
||||
<doc>
|
||||
<field name="ID">3</field>
|
||||
<field name="title">New Lucene and Solr release is out</field>
|
||||
<field name="content_type">parentDocument</field>
|
||||
<field name="id">P11!S31</field>
|
||||
<field name="color_s">BLACK</field>
|
||||
<field name="price_i">3</field>
|
||||
</doc>
|
||||
</field>
|
||||
<field name="manuals">
|
||||
<doc>
|
||||
<field name="ID">4</field>
|
||||
<field name="comments">Lots of new features</field>
|
||||
<field name="id">P11!D51</field>
|
||||
<field name="name_s">Quick Reference Guide</field>
|
||||
<field name="pages_i">1</field>
|
||||
<field name="content_t">How to use your stapler ...</field>
|
||||
</doc>
|
||||
<doc>
|
||||
<field name="id">P11!D61</field>
|
||||
<field name="name_s">Warranty Details</field>
|
||||
<field name="pages_i">42</field>
|
||||
<field name="content_t">... lifetime guarantee ...</field>
|
||||
</doc>
|
||||
</field>
|
||||
</doc>
|
||||
<doc>
|
||||
<field name="id">P22!prod</field>
|
||||
<field name="name_s">Mont Blanc Fountain Pen</field>
|
||||
<field name="description_t">A Premium Writing Instrument ...</field>
|
||||
<field name="skus">
|
||||
<doc>
|
||||
<field name="id">P22!S22</field>
|
||||
<field name="color_s">RED</field>
|
||||
<field name="price_i">89</field>
|
||||
<field name="manuals">
|
||||
<doc>
|
||||
<field name="id">P22!D42</field>
|
||||
<field name="name_s">Red Mont Blanc Brochure</field>
|
||||
<field name="pages_i">1</field>
|
||||
<field name="content_t">...</field>
|
||||
</doc>
|
||||
</field>
|
||||
</doc>
|
||||
<doc>
|
||||
<field name="id">P22!S32</field>
|
||||
<field name="color_s">BLACK</field>
|
||||
<field name="price_i">67</field>
|
||||
</doc>
|
||||
</field>
|
||||
<field name="manuals">
|
||||
<doc>
|
||||
<field name="id">P22!D52</field>
|
||||
<field name="name_s">How To Use A Pen</field>
|
||||
<field name="pages_i">42</field>
|
||||
<field name="content_t">Start by removing the cap ...</field>
|
||||
</doc>
|
||||
</field>
|
||||
</doc>
|
||||
</add>
|
||||
----
|
||||
====
|
||||
|
||||
[example.tab-pane#solrj]
|
||||
====
|
||||
[.tab-label]*SolrJ*
|
||||
[source,java,indent=0]
|
||||
----
|
||||
include::{example-source-dir}IndexingNestedDocuments.java[tag=nest-path]
|
||||
----
|
||||
====
|
||||
--
|
||||
|
||||
|
||||
== Schema Configuration
|
||||
|
||||
Indexing nested documents _requires_ an indexed field named `\_root_`:
|
||||
|
||||
[source,xml]
|
||||
----
|
||||
<field name="_root_" type="string" indexed="true" />
|
||||
----
|
||||
|
||||
Solr automatically populates this field in every nested document with the `id` value of the top most parent document in the block.
|
||||
|
||||
|
||||
There are several additional schema considerations that should be considered for people who wish to use nested documents:
|
||||
|
||||
* Nested child documents are very much documents in their own right even if certain nested documents hold different information from the parent, Therefore:
|
||||
** All field names in the schema can only be configured in one -- different types of child documents can not have the same field name configured in different ways.
|
||||
** It may be infeasible to use `required` for any field names that aren't reqiured for all types of documents.
|
||||
** Even child documents need a _globally_ unique `id`.
|
||||
* `\_root_` must be configured to either be stored (`stored="true"`) or use doc values (`docValues="true"`) to enable <<updating-parts-of-documents#updating-child-documents,atomic updates of nested documents>>.
|
||||
** Also, beware of `uniqueBlock(\_root_)` <<json-facet-api#stat-facet-functions,field type limitation>>, if you plan to use one.
|
||||
* `\_nest_path_` is an optional field that (if definied) will be populated by Solr automatically with the ancestor path of each non-root document.
|
||||
+
|
||||
[source,xml]
|
||||
----
|
||||
<fieldType name="_nest_path_" class="solr.NestPathField" />
|
||||
<field name="_nest_path_" type="_nest_path_" />`
|
||||
----
|
||||
** This field is neccessary if you wish to use <<updating-parts-of-documents#updating-child-documents,atomic updates of nested documents>>
|
||||
** This field is neccessary in order for Solr to properly record & reconstruct the nested relationship of documents when using the `<<searching-nested-documents.adoc#child-doc-transformer,[child]>>` doc transformer.
|
||||
*** If this field does not exist, the `[child]` transformer will return all descendent child documents as a flattened list -- just as if they had been <<#indexing-anonymous-children,indexed as anonymous children>>.
|
||||
** If you do not use `\_nest_path_` it is strongly recomended that every document have some field that differentiates root documents from their nested children -- and differentiates different "types" of child documents. This is not strictly neccessary, so long as it's possible to write a "filter" query that can be used to isolate and select only parent documents for use in the <<other-parsers.adoc#block-join-query-parsers,block join query parsers>> and <<searching-nested-documents.adoc#child-doc-transformer,[child]>> doc transformer
|
||||
* `\_nest_parent_` is an optional field that (if defined) will be populated by Solr automatically to store the `id` of each document's _immediate_ parent document (if there is one).
|
||||
+
|
||||
[source,xml]
|
||||
----
|
||||
<field name="_nest_parent_" type="string" indexed="true" stored="true" />
|
||||
----
|
||||
|
||||
[TIP]
|
||||
====
|
||||
When using Solr Cloud it is a _VERY_ good idea to use <<shards-and-indexing-data-in-solrcloud#document-routing,prefix based compositeIds>> with a common prefix for all documents in the block. This makes it much easier to apply <<updating-parts-of-documents#updating-child-documents,atomic updates to individual child documents>>
|
||||
====
|
||||
|
||||
|
||||
== Maintaining Integrity with Updates and Deletes
|
||||
|
||||
Blocks of nested documents can be modified simply by adding/replacing the root document with more or fewer child/descendent documents as an application desires. This can either be done explicitly/externaly by an indexing client completely re-indexing the root level document, or internally by Solr when a client uses <<updating-parts-of-documents#updating-child-documents,atomic updates>> to modify child documents. This aspect isn't different than updating any normal document except that Solr takes care to ensure that all related child documents of the existing version get deleted.
|
||||
|
||||
Clients should however be very careful to *never* add a root document that has the same `id` of a child document -- or vice-versa. Solr does not prevent clients from attempting this, but *_it will violate integrity assumptions that Solr expects._*
|
||||
|
||||
To delete an entire block of documents, you can simply delete-by-ID using the `id` of the root document. Delete-by-ID will not work with the `id` of a child document, since only root document IDs are considered. (Instead, use <<updating-parts-of-documents#updating-child-documents,atomic updates>> to remove the child document from it's parent)
|
||||
|
||||
If you use Solr's delete-by-query APIs, you *MUST* be careful to ensure that any deletion query is strutured to ensure no descendent children remain of any documents that are being deleted. *_Doing otherwise will violate integrity assumptions that Solr expects._*
|
||||
|
||||
|
||||
|
||||
|
||||
== Indexing Anonymous Children
|
||||
|
||||
Although not recommended, it is also possible to index child documents "anonymously":
|
||||
|
||||
[.dynamic-tabs]
|
||||
--
|
||||
[example.tab-pane#anon_json]
|
||||
====
|
||||
[.tab-label]*JSON*
|
||||
[source,json]
|
||||
----
|
||||
[{ "id": "P11!prod",
|
||||
"name_s": "Swingline Stapler",
|
||||
"type_s": "PRODUCT",
|
||||
"description_t": "The Cadillac of office staplers ...",
|
||||
"_childDocuments_": [
|
||||
{ "id": "P11!S21",
|
||||
"type_s": "SKU",
|
||||
"color_s": "RED",
|
||||
"price_i": 42,
|
||||
"_childDocuments_": [
|
||||
{ "id": "P11!D41",
|
||||
"type_s": "MANUAL",
|
||||
"name_s": "Red Swingline Brochure",
|
||||
"pages_i":1,
|
||||
"content_t": "..."
|
||||
} ]
|
||||
},
|
||||
{ "id": "P11!S31",
|
||||
"type_s": "SKU",
|
||||
"color_s": "BLACK",
|
||||
"price_i": 3
|
||||
},
|
||||
{ "id": "P11!D51",
|
||||
"type_s": "MANUAL",
|
||||
"name_s": "Quick Reference Guide",
|
||||
"pages_i":1,
|
||||
"content_t": "How to use your stapler ..."
|
||||
},
|
||||
{ "id": "P11!D61",
|
||||
"type_s": "MANUAL",
|
||||
"name_s": "Warranty Details",
|
||||
"pages_i":42,
|
||||
"content_t": "... lifetime guarantee ..."
|
||||
}
|
||||
]
|
||||
} ]
|
||||
----
|
||||
====
|
||||
|
||||
[example.tab-pane#anon_xml]
|
||||
====
|
||||
[.tab-label]*XML*
|
||||
[source,xml]
|
||||
----
|
||||
<add>
|
||||
<doc>
|
||||
<field name="id">P11!prod</field>
|
||||
<field name="type_s">PRODUCT</field>
|
||||
<field name="name_s">Swingline Stapler</field>
|
||||
<field name="description_t">The Cadillac of office staplers ...</field>
|
||||
<doc>
|
||||
<field name="id">P11!S21</field>
|
||||
<field name="type_s">SKU</field>
|
||||
<field name="color_s">RED</field>
|
||||
<field name="price_i">42</field>
|
||||
<doc>
|
||||
<field name="id">P11!D41</field>
|
||||
<field name="type_s">MANUAL</field>
|
||||
<field name="name_s">Red Swingline Brochure</field>
|
||||
<field name="pages_i">1</field>
|
||||
<field name="content_t">...</field>
|
||||
</doc>
|
||||
</doc>
|
||||
<doc>
|
||||
<field name="id">P11!S31</field>
|
||||
<field name="type_s">SKU</field>
|
||||
<field name="color_s">BLACK</field>
|
||||
<field name="price_i">3</field>
|
||||
</doc>
|
||||
<doc>
|
||||
<field name="id">P11!D51</field>
|
||||
<field name="type_s">MANUAL</field>
|
||||
<field name="name_s">Quick Reference Guide</field>
|
||||
<field name="pages_i">1</field>
|
||||
<field name="content_t">How to use your stapler ...</field>
|
||||
</doc>
|
||||
<doc>
|
||||
<field name="id">P11!D61</field>
|
||||
<field name="type_s">MANUAL</field>
|
||||
<field name="name_s">Warranty Details</field>
|
||||
<field name="pages_i">42</field>
|
||||
<field name="content_t">... lifetime guarantee ...</field>
|
||||
</doc>
|
||||
</doc>
|
||||
</add>
|
||||
----
|
||||
====
|
||||
|
||||
In this example, we have indexed the parent documents with the field `content_type`, which has the value "parentDocument".
|
||||
We could have also used a boolean field, such as `isParent`, with a value of "true", or any other similar approach.
|
||||
|
||||
=== JSON Examples
|
||||
|
||||
This example is equivalent to the XML example above.
|
||||
Again, the field labelled relationship is preferred.
|
||||
The labelled relationship here is one child document but could have been wrapped in array brackets.
|
||||
For the anonymous relationship, note the special `\_childDocuments_` key whose contents must be an array of child documents.
|
||||
|
||||
[source,json]
|
||||
[example.tab-pane#anon_solrj]
|
||||
====
|
||||
[.tab-label]*SolrJ*
|
||||
[source,java,indent=0]
|
||||
----
|
||||
[
|
||||
{
|
||||
"ID": "1",
|
||||
"title": "Solr adds block join support",
|
||||
"content_type": "parentDocument",
|
||||
"comments": [{
|
||||
"ID": "2",
|
||||
"content": "SolrCloud supports it too!"
|
||||
},
|
||||
{
|
||||
"ID": "3",
|
||||
"content": "New filter syntax"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"ID": "4",
|
||||
"title": "New Lucene and Solr release is out",
|
||||
"content_type": "parentDocument",
|
||||
"_childDocuments_": [
|
||||
{
|
||||
"ID": "5",
|
||||
"comments": "Lots of new features"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
include::{example-source-dir}IndexingNestedDocuments.java[tag=anon-kids]
|
||||
----
|
||||
====
|
||||
|
||||
.Root-Only Mode
|
||||
[NOTE]
|
||||
In Root-only schemas, these two documents will result in the same docs being indexed (Root-only schemas do not honor nested relationships).
|
||||
When queried, child docs will be appended to the _childDocuments_ field/key.
|
||||
--
|
||||
|
||||
=== Important: Maintaining Integrity with Updates and Deletes
|
||||
|
||||
Nested documents (children and all) can simply be replaced by adding a new document with more or fewer documents as an application desires. This aspect isn't different than updating any normal document except that Solr takes care to ensure that all related child documents of the existing version get deleted.
|
||||
This simplified approach was common in older versions of Solr, and can still be used with "Root-Only" schemas that do not contain any other nested related fields apart from `\_root_`. (Many schemas in existence are this way simply because default configsets are this way, even if the application isn't using nested documents.)
|
||||
|
||||
Do *not* add a root document that has the same ID of a child document. _This will violate integrity assumptions that Solr expects._
|
||||
This approach should *NOT* be used when schemas include a `\_nest_path_` field, as the existence of that field triggers assumptions and changes in behavior in various query time functionality, such as the <<searching-nested-documents.adoc#child-doc-transformer,[child]>>, that will not work when nested documents do not have any intrinsic "nested path" information.
|
||||
|
||||
To delete a nested document, you can delete it by the ID of the root document.
|
||||
If you try to use an ID of a child document, nothing will happen since only root document IDs are considered.
|
||||
If you use Solr's delete-by-query APIs, you *have to be careful* to ensure that no children remain of any documents that are being deleted. _Doing otherwise will violate integrity assumptions that Solr expects._
|
||||
The results of indexing anonymous nested children with a "Root-Only" schema are similar to what happens if you attempt to index "psuedo field" nested documents using a "Root-Only" schema. Notably: since there is no nested path information for the <<searching-nested-documents.adoc#child-doc-transformer,[child]>> transformer to use to reconstruct the structured of a block of documents, it returns all matching children as a flat list, similar in structure to how they were originally indexed:
|
||||
|
||||
|
||||
|
||||
[.dynamic-tabs]
|
||||
--
|
||||
[example.tab-pane#anon_json_out]
|
||||
====
|
||||
[.tab-label]*JSON*
|
||||
[source,bash]
|
||||
----
|
||||
$ curl --globoff 'http://localhost:8983/solr/gettingstarted/select?omitHeader=true&q=id:P11!prod&fl=*,[child%20parentFilter=%22type_s:PRODUCT%22]'
|
||||
{
|
||||
"response":{"numFound":1,"start":0,"maxScore":0.7002023,"numFoundExact":true,"docs":[
|
||||
{
|
||||
"id":"P11!prod",
|
||||
"name_s":"Swingline Stapler",
|
||||
"type_s":"PRODUCT",
|
||||
"description_t":"The Cadillac of office staplers ...",
|
||||
"_version_":1673055562829398016,
|
||||
"_childDocuments_":[
|
||||
{
|
||||
"id":"P11!D41",
|
||||
"type_s":"MANUAL",
|
||||
"name_s":"Red Swingline Brochure",
|
||||
"pages_i":1,
|
||||
"content_t":"...",
|
||||
"_version_":1673055562829398016},
|
||||
{
|
||||
"id":"P11!S21",
|
||||
"type_s":"SKU",
|
||||
"color_s":"RED",
|
||||
"price_i":42,
|
||||
"_version_":1673055562829398016},
|
||||
{
|
||||
"id":"P11!S31",
|
||||
"type_s":"SKU",
|
||||
"color_s":"BLACK",
|
||||
"price_i":3,
|
||||
"_version_":1673055562829398016},
|
||||
{
|
||||
"id":"P11!D51",
|
||||
"type_s":"MANUAL",
|
||||
"name_s":"Quick Reference Guide",
|
||||
"pages_i":1,
|
||||
"content_t":"How to use your stapler ...",
|
||||
"_version_":1673055562829398016},
|
||||
{
|
||||
"id":"P11!D61",
|
||||
"type_s":"MANUAL",
|
||||
"name_s":"Warranty Details",
|
||||
"pages_i":42,
|
||||
"content_t":"... lifetime guarantee ...",
|
||||
"_version_":1673055562829398016}]}]
|
||||
}}
|
||||
----
|
||||
====
|
||||
|
||||
[example.tab-pane#anon_xml_out]
|
||||
====
|
||||
[.tab-label]*XML*
|
||||
[source,bash]
|
||||
----
|
||||
$ curl --globoff 'http://localhost:8983/solr/gettingstarted/select?omitHeader=true&q=id:P11!prod&fl=*,[child%20parentFilter=%22type_s:PRODUCT%22]&wt=xml'
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<response>
|
||||
|
||||
<result name="response" numFound="1" start="0" maxScore="0.7002023" numFoundExact="true">
|
||||
<doc>
|
||||
<str name="id">P11!prod</str>
|
||||
<str name="name_s">Swingline Stapler</str>
|
||||
<str name="type_s">PRODUCT</str>
|
||||
<str name="description_t">The Cadillac of office staplers ...</str>
|
||||
<long name="_version_">1673055562829398016</long>
|
||||
<doc>
|
||||
<str name="id">P11!D41</str>
|
||||
<str name="type_s">MANUAL</str>
|
||||
<str name="name_s">Red Swingline Brochure</str>
|
||||
<int name="pages_i">1</int>
|
||||
<str name="content_t">...</str>
|
||||
<long name="_version_">1673055562829398016</long></doc>
|
||||
<doc>
|
||||
<str name="id">P11!S21</str>
|
||||
<str name="type_s">SKU</str>
|
||||
<str name="color_s">RED</str>
|
||||
<int name="price_i">42</int>
|
||||
<long name="_version_">1673055562829398016</long></doc>
|
||||
<doc>
|
||||
<str name="id">P11!S31</str>
|
||||
<str name="type_s">SKU</str>
|
||||
<str name="color_s">BLACK</str>
|
||||
<int name="price_i">3</int>
|
||||
<long name="_version_">1673055562829398016</long></doc>
|
||||
<doc>
|
||||
<str name="id">P11!D51</str>
|
||||
<str name="type_s">MANUAL</str>
|
||||
<str name="name_s">Quick Reference Guide</str>
|
||||
<int name="pages_i">1</int>
|
||||
<str name="content_t">How to use your stapler ...</str>
|
||||
<long name="_version_">1673055562829398016</long></doc>
|
||||
<doc>
|
||||
<str name="id">P11!D61</str>
|
||||
<str name="type_s">MANUAL</str>
|
||||
<str name="name_s">Warranty Details</str>
|
||||
<int name="pages_i">42</int>
|
||||
<str name="content_t">... lifetime guarantee ...</str>
|
||||
<long name="_version_">1673055562829398016</long></doc></doc>
|
||||
</result>
|
||||
</response>
|
||||
----
|
||||
====
|
||||
|
||||
|
||||
--
|
||||
|
|
|
@ -26,26 +26,28 @@ Many of these parsers are expressed the same way as <<local-parameters-in-querie
|
|||
|
||||
There are two query parsers that support block joins. These parsers allow indexing and searching for relational content that has been <<indexing-nested-documents.adoc#indexing-nested-documents, indexed as Nested Documents>>.
|
||||
|
||||
The example usage of the query parsers below assumes these two documents and each of their child documents have been indexed:
|
||||
The example usage of the query parsers below assumes the following documents have been indexed:
|
||||
|
||||
[source,xml]
|
||||
----
|
||||
<add>
|
||||
<doc>
|
||||
<field name="id">1</field>
|
||||
<field name="content_type">parent</field>
|
||||
<field name="title">Solr has block join support</field>
|
||||
<field name="content_type">parentDocument</field>
|
||||
<doc>
|
||||
<field name="id">2</field>
|
||||
<field name="content_type">child</field>
|
||||
<field name="comments">SolrCloud supports it too!</field>
|
||||
</doc>
|
||||
</doc>
|
||||
<doc>
|
||||
<field name="id">3</field>
|
||||
<field name="content_type">parent</field>
|
||||
<field name="title">New Lucene and Solr release</field>
|
||||
<field name="content_type">parentDocument</field>
|
||||
<doc>
|
||||
<field name="id">4</field>
|
||||
<field name="content_type">child</field>
|
||||
<field name="comments">Lots of new features</field>
|
||||
</doc>
|
||||
</doc>
|
||||
|
@ -54,21 +56,23 @@ The example usage of the query parsers below assumes these two documents and eac
|
|||
|
||||
=== Block Join Children Query Parser
|
||||
|
||||
This parser takes a query that matches some parent documents and returns their children.
|
||||
This parser wraps a query that matches some parent documents and returns the children of those documents.
|
||||
|
||||
The syntax for this parser is: `q={!child of=<allParents>}<someParents>`.
|
||||
The syntax for this parser is: `q={!child of=<blockMask>}<someParents>`.
|
||||
|
||||
The parameter `allParents` is a filter that matches *only parent documents*; here you would define the field and value that you used to identify *all parent documents*.
|
||||
* The inner subordinate query string (`someParents`) must be a query that will match some parent documents
|
||||
* The `of` parameter must be a query string to use as a <<#block-mask,Block Mask>> -- typically a query that matches the set of all possible parent documents
|
||||
|
||||
The parameter `someParents` identifies a query that will match some of the parent documents. The output is the children.
|
||||
The resulting query will match all documents which do _not_ match the `<blockMask>` query and are children (or descendents) of the documents matched by `<someParents>`.
|
||||
|
||||
Using the example documents above, we can construct a query such as `q={!child of="content_type:parentDocument"}title:lucene&wt=xml`. We only get one document in response:
|
||||
Using the example documents above, we can construct a query such as `q={!child of="content_type:parent"}title:lucene`. We only get one document in response:
|
||||
|
||||
[source,xml]
|
||||
----
|
||||
<result name="response" numFound="1" start="0">
|
||||
<doc>
|
||||
<str name="id">4</str>
|
||||
<arr name="content_type"><str>child</str></arr>
|
||||
<str name="comments">Lots of new features</str>
|
||||
</doc>
|
||||
</result>
|
||||
|
@ -76,12 +80,12 @@ Using the example documents above, we can construct a query such as `q={!child o
|
|||
|
||||
[CAUTION]
|
||||
====
|
||||
The query for `someParents` should match only parent documents passed by `allParents` or you may get an exception:
|
||||
The query for `someParents` *MUST* match a strict subset of the documents matched by the <<#block-mask,Block Mask>> or your query may result in an Error:
|
||||
|
||||
[literal]
|
||||
Parent query must not match any docs besides parent filter. Combine them as must (+) and must-not (-) clauses to find a problem doc.
|
||||
|
||||
You can search for `q=+(someParents) -(allParents)` to find a cause if you encounter this error.
|
||||
You can search for `q=+(someParents) -(blockMask)` to find a cause if you encounter this type of error.
|
||||
====
|
||||
|
||||
==== Filtering and Tagging
|
||||
|
@ -89,78 +93,72 @@ You can search for `q=+(someParents) -(allParents)` to find a cause if you encou
|
|||
`{!child}` also supports `filters` and `excludeTags` local parameters like the following:
|
||||
|
||||
[source,text]
|
||||
{!child of=<allParents> filters=$parentfq excludeTags=certain}<someParents>&parentfq=BRAND:Foo&parentfq=NAME:Bar&parentfq={!tag=certain}CATEGORY:Baz
|
||||
?q={!child of=<blockMask> filters=$parentfq excludeTags=certain}<someParents>
|
||||
&parentfq=BRAND:Foo
|
||||
&parentfq=NAME:Bar
|
||||
&parentfq={!tag=certain}CATEGORY:Baz
|
||||
|
||||
This is equivalent to:
|
||||
|
||||
[source,text]
|
||||
{!child of=<allParents>}+<someParents> +BRAND:Foo +NAME:Bar
|
||||
q={!child of=<blockMask>}+<someParents> +BRAND:Foo +NAME:Bar
|
||||
|
||||
Notice "$" syntax in `filters` for referencing queries; comma-separated tags `excludeTags` allows to exclude certain queries by tagging. Overall the idea is similar to <<faceting.adoc#tagging-and-excluding-filters, excluding fq in facets>>. Note, that filtering is applied to the subordinate clause (`<someParents>`), and the intersection result is joined to the children.
|
||||
|
||||
==== All Children Syntax
|
||||
|
||||
When subordinate clause (`<someParents>`) is omitted, it's parsed as a _segmented_ and _cached_ filter for children documents. More precisely, `q={!child of=<allParents>}` is equivalent to `q=\*:* -<allParents>`.
|
||||
When subordinate clause (`<someParents>`) is omitted, it's parsed as a _segmented_ and _cached_ filter for children documents. More precisely, `q={!child of=<blockMask>}` is equivalent to `q=\*:* -<blockMask>`.
|
||||
|
||||
=== Block Join Parent Query Parser
|
||||
|
||||
This parser takes a query that matches child documents and returns their parents.
|
||||
|
||||
The syntax for this parser is similar: `q={!parent which=<allParents>}<someChildren>`.
|
||||
The syntax for this parser is similar to the `child` parser: `q={!parent which=<blockMask>}<someChildren>`.
|
||||
|
||||
The parameter `allParents` is a filter that matches *only parent documents*; here you would define the field and value that you used to identify *all parent documents*.
|
||||
* The inner subordinate query string (`someChildren`) must be a query that will match some child documents
|
||||
* The `which` parameter must be a query string to use as a <<#block-mask,Block Mask>> -- typically a query that matches the set of all possible parent documents
|
||||
|
||||
The parameter `someChildren` is a query that matches some or all of the child documents.
|
||||
The resulting query will match all documents which _do_ match the `<blockMask>` query and are parents (or ancestors) of the documents matched by `<someChildren>`.
|
||||
|
||||
[CAUTION]
|
||||
====
|
||||
The query for `someChildren` should match only child documents or you may get an exception:
|
||||
|
||||
[literal]
|
||||
Child query must not match same docs with parent filter. Combine them as must clauses (+) to find a problem doc.
|
||||
|
||||
You can search for `q=+(parentFilter) +(someChildren)` to find a cause.
|
||||
====
|
||||
|
||||
Again using the example documents above, we can construct a query such as `q={!parent which="content_type:parentDocument"}comments:SolrCloud&wt=xml`. We get this document in response:
|
||||
Again using the example documents above, we can construct a query such as `q={!parent which="content_type:parent"}comments:SolrCloud`. We get this document in response:
|
||||
|
||||
[source,xml]
|
||||
----
|
||||
<result name="response" numFound="1" start="0">
|
||||
<doc>
|
||||
<str name="id">1</str>
|
||||
<arr name="content_type"><str>parent</str></arr>
|
||||
<arr name="title"><str>Solr has block join support</str></arr>
|
||||
<arr name="content_type"><str>parentDocument</str></arr>
|
||||
</doc>
|
||||
</result>
|
||||
----
|
||||
|
||||
.Using which
|
||||
[WARNING]
|
||||
|
||||
[CAUTION]
|
||||
====
|
||||
A common mistake is to try to filter parents with a `which` filter, as in this bad example:
|
||||
The query for `someChildren` *MUST NOT* match any documents matched by the <<#block-mask,Block Mask>> or your query may result in an Error:
|
||||
|
||||
`q={!parent which="*title:join*"}comments:SolrCloud`
|
||||
[literal]
|
||||
Child query must not match same docs with parent filter. Combine them as must clauses (+) to find a problem doc.
|
||||
|
||||
Instead, you should use a sibling mandatory clause as a filter:
|
||||
|
||||
`q= *+title:join* +{!parent which="*content_type:parentDocument*"}comments:SolrCloud`
|
||||
You can search for `q=+(blockMask) +(someChildren)` to find a cause.
|
||||
====
|
||||
|
||||
|
||||
==== Filtering and Tagging
|
||||
|
||||
The `{!parent}` query supports `filters` and `excludeTags` local parameters like the following:
|
||||
|
||||
[source,text]
|
||||
{!parent which=<allParents> filters=$childfq excludeTags=certain}<someChildren>&
|
||||
childfq=COLOR:Red&
|
||||
childfq=SIZE:XL&
|
||||
childfq={!tag=certain}PRINT:Hatched
|
||||
?q={!parent which=<blockMask> filters=$childfq excludeTags=certain}<someChildren>
|
||||
&childfq=COLOR:Red
|
||||
&childfq=SIZE:XL
|
||||
&childfq={!tag=certain}PRINT:Hatched
|
||||
|
||||
This is equivalent to:
|
||||
|
||||
[source,text]
|
||||
{!parent which=<allParents>}+<someChildren> +COLOR:Red +SIZE:XL
|
||||
q={!parent which=<blockMask>}+<someChildren> +COLOR:Red +SIZE:XL
|
||||
|
||||
Notice the "$" syntax in `filters` for referencing queries. Comma-separated tags in `excludeTags` allow excluding certain queries by tagging. Overall the idea is similar to <<faceting.adoc#tagging-and-excluding-filters, excluding fq in facets>>. Note that filtering is applied to the subordinate clause (`<someChildren>`) first, and the intersection result is joined to the parents.
|
||||
|
||||
|
@ -170,7 +168,41 @@ You can optionally use the `score` local parameter to return scores of the subor
|
|||
|
||||
==== All Parents Syntax
|
||||
|
||||
When subordinate clause (`<someChildren>`) is omitted, it's parsed as a _segmented_ and _cached_ filter for all parent documents, or more precisely `q={!parent which=<allParents>}` is equivalent to `q=<allParents>`.
|
||||
When subordinate clause (`<someChildren>`) is omitted, it's parsed as a _segmented_ and _cached_ filter for all parent documents, or more precisely `q={!parent which=<blockMask>}` is equivalent to `q=<blockMask>`.
|
||||
|
||||
[#block-mask]
|
||||
=== Block Masks: The `of` and `which` local params
|
||||
|
||||
The purpose of the "Block Mask" query specified as either an `of` or `which` param (depending on the parser used) is to identy the set of all documents in the index which should be treated as "parents" _(or their ancestors)_ and which documents should be treated as "children". This is important because in the "on disk" index, the relationships are flattened into "blocks" of documents, so the `of` / `which` params are needed to serve as a "mask" against the flat document blocks to identify the boundaries of every hierarchical relationship.
|
||||
|
||||
In the example queries above, we were able to use a very simple Block Mask of `doc_type:parent` because our data is very simple: every document is either a `parent` or a `child` So this query string easily distinguishes _all_ of our documents.
|
||||
|
||||
A common mistake is to try and use a `which` parameter that is more restrictive then the set of all parent documents, in order to filter the parents that are matched, as in this bad example:
|
||||
|
||||
----
|
||||
// BAD! DO NOT USE!
|
||||
q={!parent which="title:join"}comments:support
|
||||
----
|
||||
|
||||
This type of query will frequenly not work the way you might expect. Since the `which` param only identifies _some_ of the "parent" documents, the resulting query can match "parent" documents it should not, because it will mistakenly identify all documents which do _not_ match the `which="title:join"` Block Mask as children of the next "parent" document in the index (that does match this Mask).
|
||||
|
||||
A similar problematic situation can arise when mixing parent/child documents with "simple" documents that have no children _and do not match the query used to identify 'parent' documents_. For example, if we add the following document to our existing parent/child example documents...
|
||||
|
||||
[source,xml]
|
||||
----
|
||||
<add>
|
||||
<doc>
|
||||
<field name="id">0</field>
|
||||
<field name="content_type">plain</field>
|
||||
<field name="title">Lucene and Solr are cool</field>
|
||||
</doc>
|
||||
</add>
|
||||
----
|
||||
|
||||
...then our simple `doc_type:parent` Block Mask would no longer be adequate. We would instead need to use `\*:* -doc_type:child` or `doc_type:(simple parent)` to prevent our "simple" document from mistakenly being treated as a "child" of an adjacent "parent" document.
|
||||
|
||||
The <<searching-nested-documents#searching-nested-documents,Searching Nested Documents>> section contains more detailed examples of specifing Block Mask queries with non trivial hierarchicies of documents.
|
||||
|
||||
|
||||
== Boolean Query Parser
|
||||
|
||||
|
|
|
@ -29,174 +29,269 @@ This section does not show case faceting on nested documents. For nested documen
|
|||
|
||||
== Query Examples
|
||||
|
||||
For the upcoming examples, assume the following documents have been indexed:
|
||||
For the upcoming examples, we'll assume an index containing the same documents covered in <<indexing-nested-documents#example-indexing-syntax,Indexing Nested Documents>>:
|
||||
|
||||
[source,json]
|
||||
----
|
||||
[
|
||||
{
|
||||
"ID": "1",
|
||||
"title": "Cooking Recommendations",
|
||||
"tags": ["cooking", "meetup"],
|
||||
"posts": [{
|
||||
"ID": "2",
|
||||
"title": "Cookies",
|
||||
"comments": [{
|
||||
"ID": "3",
|
||||
"content": "Lovely recipe"
|
||||
},
|
||||
{
|
||||
"ID": "4",
|
||||
"content": "A-"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"ID": "5",
|
||||
"title": "Cakes"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"ID": "6",
|
||||
"title": "For Hire",
|
||||
"tags": ["professional", "jobs"],
|
||||
"posts": [{
|
||||
"ID": "7",
|
||||
"title": "Search Engineer",
|
||||
"comments": [{
|
||||
"ID": "8",
|
||||
"content": "I am interested"
|
||||
},
|
||||
{
|
||||
"ID": "9",
|
||||
"content": "How large is the team?"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"ID": "10",
|
||||
"title": "Low level Engineer"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
----
|
||||
include::indexing-nested-documents.adoc[tag=sample-indexing-deeply-nested-documents]
|
||||
|
||||
=== Child Doc Transformer
|
||||
|
||||
Can be used enrich query results with the documents' descendants. +
|
||||
For a detailed explanation of this transformer, see the section <<transforming-result-documents.adoc#child-childdoctransformerfactory, [child] - ChildDocTransformerFactory>>.
|
||||
By default, documents that match a query do not include any of their nested children in the response. The `[child]` Doc Transformer Can be used enrich query results with the documents' descendants.
|
||||
|
||||
For example, let us examine this query:
|
||||
`q=ID:1,
|
||||
fl=ID,[child childFilter=/comments/content:recipe]`. +
|
||||
The Child Doc Transformer can be used to enrich matching docs with comments that match a particular filter. +
|
||||
In this particular query, the child Filter will only match the first comment of doc(ID:1),
|
||||
therefore only that particular comment will be appended to the result. This is a special syntax feature.
|
||||
For a detailed explanation of this transformer, and specifics on it's syntax & limitations, please refer to the section <<transforming-result-documents.adoc#child-childdoctransformerfactory, [child] - ChildDocTransformerFactory>>.
|
||||
|
||||
[source,json]
|
||||
A simple query matching all documents with a description that includes "staplers":
|
||||
|
||||
[source,bash]
|
||||
----
|
||||
{ "response":{"numFound":1,"start":0,"docs":[
|
||||
$ curl 'http://localhost:8983/solr/gettingstarted/select?omitHeader=true&q=description_t:staplers'
|
||||
{
|
||||
"response":{"numFound":1,"start":0,"maxScore":0.30136836,"numFoundExact":true,"docs":[
|
||||
{
|
||||
"ID": "1",
|
||||
"title": "Cooking Recommendations",
|
||||
"tags": ["cooking", "meetup"],
|
||||
"posts": [{
|
||||
"ID": "2",
|
||||
"title": "Cookies",
|
||||
"comments": [{
|
||||
"ID": "3",
|
||||
"content": "Lovely recipe"
|
||||
}]
|
||||
}]
|
||||
}]
|
||||
}
|
||||
}
|
||||
"id":"P11!prod",
|
||||
"name_s":"Swingline Stapler",
|
||||
"description_t":"The Cadillac of office staplers ...",
|
||||
"_version_":1672933224035123200}]
|
||||
}}
|
||||
----
|
||||
|
||||
=== Children Query Parser
|
||||
The same query with the addition of the `[child]` transformer is shown below. Note that the `numFound` has not changed, we are still matching the same set of documents, but when returning those documents the nested children are also returned as psuedo-fields.
|
||||
|
||||
Can be used to retrieve children of a matching document. +
|
||||
For a detailed explanation of this parser, see the section <<other-parsers.adoc#block-join-children-query-parser, Block Join Children Query Parser>>.
|
||||
|
||||
For example, let us examine this query:
|
||||
`q={!child of='_nest_path_:/posts}content:"Search Engineer"`. +
|
||||
The `'of'` filter returns all posts. This is used to filter out all documents in a particular path of the hierarchy(all parents).
|
||||
The second part of the query is a filter for some parents, which we wish to return their children. +
|
||||
In this example, all comments of posts which had "Search Engineer in their `content` field will be returned.
|
||||
|
||||
[source,json]
|
||||
[source,bash]
|
||||
----
|
||||
{ "response":{"numFound":2,"start":0,"docs":[
|
||||
$ curl 'http://localhost:8983/solr/gettingstarted/select?omitHeader=true&q=description_t:staplers&fl=*,[child]'
|
||||
{
|
||||
"response":{"numFound":1,"start":0,"maxScore":0.30136836,"numFoundExact":true,"docs":[
|
||||
{
|
||||
"ID": "8",
|
||||
"content": "I am interested"
|
||||
},
|
||||
"id":"P11!prod",
|
||||
"name_s":"Swingline Stapler",
|
||||
"description_t":"The Cadillac of office staplers ...",
|
||||
"_version_":1672933224035123200,
|
||||
"skus":[
|
||||
{
|
||||
"ID": "9",
|
||||
"content": "How large is the team?"
|
||||
}
|
||||
]}
|
||||
}
|
||||
"id":"P11!S21",
|
||||
"color_s":"RED",
|
||||
"price_i":42,
|
||||
"_version_":1672933224035123200,
|
||||
"manuals":[
|
||||
{
|
||||
"id":"P11!D41",
|
||||
"name_s":"Red Swingline Brochure",
|
||||
"pages_i":1,
|
||||
"content_t":"...",
|
||||
"_version_":1672933224035123200}]},
|
||||
|
||||
{
|
||||
"id":"P11!S31",
|
||||
"color_s":"BLACK",
|
||||
"price_i":3,
|
||||
"_version_":1672933224035123200}],
|
||||
"manuals":[
|
||||
{
|
||||
"id":"P11!D51",
|
||||
"name_s":"Quick Reference Guide",
|
||||
"pages_i":1,
|
||||
"content_t":"How to use your stapler ...",
|
||||
"_version_":1672933224035123200},
|
||||
|
||||
{
|
||||
"id":"P11!D61",
|
||||
"name_s":"Warranty Details",
|
||||
"pages_i":42,
|
||||
"content_t":"... lifetime guarantee ...",
|
||||
"_version_":1672933224035123200}]}]
|
||||
}}
|
||||
----
|
||||
|
||||
=== Parents Query Parser
|
||||
|
||||
Can be used to retrieve parents of a child document. +
|
||||
For a detailed explanation of this parser, see the section <<other-parsers.adoc#block-join-parent-query-parser,Block Join Parent Query Parser>>.
|
||||
=== Child Query Parser
|
||||
|
||||
For example, let us examine this query:
|
||||
`q={!parent which='-_nest_path_:* \*:*'}title:"Search Engineer"`. +
|
||||
The `'which'` filter returns all root documents.
|
||||
The second part of this query is a filter to match some child documents.
|
||||
This query returns the parent at the root(since all parents filter returns root documents) of each
|
||||
matching child document. In this case, all child documents which had `Search Engineer` in their `title` field.
|
||||
The `{!child}` query parser can be used to search for the _descendent_ documents of parent documents matching a wrapped query. For a detailed explanation of this parser, see the section <<other-parsers.adoc#block-join-children-query-parser, Block Join Children Query Parser>>.
|
||||
|
||||
[source,json]
|
||||
Let's consider again the `description_t:staplers` query used above -- if we wrap that query in a `{!child}` query parser then instead of "matching" & returning the product level documents, we instead match all of the _descendent_ child documents of the original query:
|
||||
|
||||
[source,bash]
|
||||
----
|
||||
{ "response":{"numFound":1,"start":0,"docs":[{
|
||||
"ID": "6",
|
||||
"title": "For Hire",
|
||||
"tags": ["professional", "jobs"]
|
||||
}
|
||||
]}
|
||||
}
|
||||
$ curl 'http://localhost:8983/solr/gettingstarted/select' -d 'omitHeader=true' -d 'q={!child of="*:* -_nest_path_:*"}description_t:staplers'
|
||||
{
|
||||
"response":{"numFound":5,"start":0,"maxScore":0.30136836,"numFoundExact":true,"docs":[
|
||||
{
|
||||
"id":"P11!D41",
|
||||
"name_s":"Red Swingline Brochure",
|
||||
"pages_i":1,
|
||||
"content_t":"...",
|
||||
"_version_":1672933224035123200},
|
||||
{
|
||||
"id":"P11!S21",
|
||||
"color_s":"RED",
|
||||
"price_i":42,
|
||||
"_version_":1672933224035123200},
|
||||
{
|
||||
"id":"P11!S31",
|
||||
"color_s":"BLACK",
|
||||
"price_i":3,
|
||||
"_version_":1672933224035123200},
|
||||
{
|
||||
"id":"P11!D51",
|
||||
"name_s":"Quick Reference Guide",
|
||||
"pages_i":1,
|
||||
"content_t":"How to use your stapler ...",
|
||||
"_version_":1672933224035123200},
|
||||
{
|
||||
"id":"P11!D61",
|
||||
"name_s":"Warranty Details",
|
||||
"pages_i":42,
|
||||
"content_t":"... lifetime guarantee ...",
|
||||
"_version_":1672933224035123200}]
|
||||
}}
|
||||
----
|
||||
|
||||
In this example we've used `\*:* -\_nest_path_:*` as our <<other-parsers#block-mask,`of` parameter>> to indicate we want to consider all documents which don't have a nest path -- ie: all "root" level document -- as the set of possible parents.
|
||||
|
||||
By changing the `of` param to match ancestors at specific `\_nest_path_` levels, we can narrow down the list of children we return. In the query below, we search for all descendents of `skus` (using an `of` param that identifies all documents that do _not_ have a `\_nest_path_` with the prefix `/skus/*`) with a `price_i` less then `50`:
|
||||
|
||||
[source,bash]
|
||||
----
|
||||
$ curl 'http://localhost:8983/solr/gettingstarted/select' -d 'omitHeader=true' --data-urlencode 'q={!child of="*:* -_nest_path_:\\/skus\\/*"}(+price_i:[* TO 50] +_nest_path_:\/skus)'
|
||||
{
|
||||
"response":{"numFound":1,"start":0,"maxScore":1.0,"numFoundExact":true,"docs":[
|
||||
{
|
||||
"id":"P11!D41",
|
||||
"name_s":"Red Swingline Brochure",
|
||||
"pages_i":1,
|
||||
"content_t":"...",
|
||||
"_version_":1675662666752851968}]
|
||||
}}
|
||||
----
|
||||
|
||||
[#double-escaping-nest-path-slashes]
|
||||
[CAUTION]
|
||||
.Double Escaping `\_nest_path_` slashes in `of`
|
||||
====
|
||||
Note that in the above example, the `/` characters in the `\_nest_path_` were "double escaped" in the `of` parameter:
|
||||
|
||||
* One level of `\` escaping is neccessary to prevent the `/` from being interpreted as a {lucene-javadocs}/queryparser/org/apache/lucene/queryparser/classic/package-summary.html#Regexp_Searches[Regex Query]
|
||||
* An additional level of "escaping the escape character" is neccessary because the `of` local parameter is a quoted string; so we need a second `\` to ensure the first `\` is preserved and passed as is to the query parser.
|
||||
|
||||
(You can see that only a single level of of `\` escaping is needed in the body of the query string -- to prevent the Regex syntax -- because it's not a quoted string local param)
|
||||
|
||||
You may find it more convinient to use <<local-parameters-in-queries#parameter-dereferencing,parameter references>> in conjunction with <<other-parsers#other-parsers,other parsers>> that do not treat `/` as a special character to express the same query in a more verbose form:
|
||||
|
||||
[source,bash]
|
||||
----
|
||||
curl 'http://localhost:8983/solr/gettingstarted/select' -d 'omitHeader=true' --data-urlencode 'q={!child of=$block_mask}(+price_i:[* TO 50] +{!field f="_nest_path_" v="/skus"})' --data-urlencode 'block_mask=(*:* -{!prefix f="_nest_path_" v="/skus/"})'
|
||||
----
|
||||
|
||||
====
|
||||
|
||||
|
||||
=== Parent Query Parser
|
||||
|
||||
The inverse of the `{!child}` query parser is the `{!parent}` query parser, which let's you search for the _ancestor_ documents of some child documents matching a wrapped query. For a detailed explanation of this parser, see the section <<other-parsers.adoc#block-join-parent-query-parser,Block Join Parent Query Parser>>.
|
||||
|
||||
Let's first consider this example of searching for all "manual" type documents that have exactly `1` page:
|
||||
|
||||
[source,bash]
|
||||
----
|
||||
$ curl 'http://localhost:8983/solr/gettingstarted/select?omitHeader=true&q=pages_i:1'
|
||||
{
|
||||
"response":{"numFound":3,"start":0,"maxScore":1.0,"numFoundExact":true,"docs":[
|
||||
{
|
||||
"id":"P11!D41",
|
||||
"name_s":"Red Swingline Brochure",
|
||||
"pages_i":1,
|
||||
"content_t":"...",
|
||||
"_version_":1676585794196733952},
|
||||
{
|
||||
"id":"P11!D51",
|
||||
"name_s":"Quick Reference Guide",
|
||||
"pages_i":1,
|
||||
"content_t":"How to use your stapler ...",
|
||||
"_version_":1676585794196733952},
|
||||
{
|
||||
"id":"P22!D42",
|
||||
"name_s":"Red Mont Blanc Brochure",
|
||||
"pages_i":1,
|
||||
"content_t":"...",
|
||||
"_version_":1676585794347728896}]
|
||||
}}
|
||||
----
|
||||
|
||||
We can wrap that query in a `{!parent}` query to return the details of all products that are ancestors of these manuals:
|
||||
|
||||
[source,bash]
|
||||
----
|
||||
$ curl 'http://localhost:8983/solr/gettingstarted/select' -d 'omitHeader=true' --data-urlencode 'q={!parent which="*:* -_nest_path_:*"}(+_nest_path_:\/skus\/manuals +pages_i:1)'
|
||||
{
|
||||
"response":{"numFound":2,"start":0,"maxScore":1.4E-45,"numFoundExact":true,"docs":[
|
||||
{
|
||||
"id":"P11!prod",
|
||||
"name_s":"Swingline Stapler",
|
||||
"description_t":"The Cadillac of office staplers ...",
|
||||
"_version_":1676585794196733952},
|
||||
{
|
||||
"id":"P22!prod",
|
||||
"name_s":"Mont Blanc Fountain Pen",
|
||||
"description_t":"A Premium Writing Instrument ...",
|
||||
"_version_":1676585794347728896}]
|
||||
}}
|
||||
----
|
||||
|
||||
In this example we've used `\*:* -\_nest_path_:*` as our <<other-parsers#block-mask,`which` parameter>> to indicate we want to consider all documents which don't have a nest path -- ie: all "root" level document -- as the set of possible parents.
|
||||
|
||||
By changing the `which` param to match ancestors at specific `\_nest_path_` levels, we can change the type of ancestors we return. In the query below, we search for `skus` (using an `which` param that identifies all documents that do _not_ have a `\_nest_path_` with the prefix `/skus/*`) that are the ancestors of `manuals` with exactly `1` page:
|
||||
|
||||
[source,bash]
|
||||
----
|
||||
$ curl 'http://localhost:8983/solr/gettingstarted/select' -d 'omitHeader=true' --data-urlencode 'q={!parent which="*:* -_nest_path_:\\/skus\\/*"}(+_nest_path_:\/skus\/manuals +pages_i:1)'
|
||||
{
|
||||
"response":{"numFound":2,"start":0,"maxScore":1.4E-45,"numFoundExact":true,"docs":[
|
||||
{
|
||||
"id":"P11!S21",
|
||||
"color_s":"RED",
|
||||
"price_i":42,
|
||||
"_version_":1676585794196733952},
|
||||
{
|
||||
"id":"P22!S22",
|
||||
"color_s":"RED",
|
||||
"price_i":89,
|
||||
"_version_":1676585794347728896}]
|
||||
}}
|
||||
----
|
||||
|
||||
[CAUTION]
|
||||
====
|
||||
Note that in the above example, the `/` characters in the `\_nest_path_` were "double escaped" in the `which` parameter, for the <<#double-escaping-nest-path-slashes,same reasons discussed above>> regarding the `{!child} pasers `of` parameter.
|
||||
====
|
||||
|
||||
|
||||
=== Combining Block Join Query Parsers with Child Doc Transformer
|
||||
|
||||
The combination of these two features enable seamless creation of powerful queries. +
|
||||
For example, querying posts which are under a page tagged as a job, contain the words "Search Engineer".
|
||||
The comments for matching posts can also be fetched, all done in a single Solr Query.
|
||||
The combination of these two parsers with the `[child] transformer enables seamless creation of very powerful queries.
|
||||
|
||||
For example, let us examine this query:
|
||||
`q=+{!child of='-\_nest_path_:* \*:*'}+tags:"jobs" &fl=*,[child]
|
||||
&fq=\_nest_path_:/posts`. +
|
||||
This query returns all posts and their comments, which had "Search Engineer" in their title,
|
||||
and are indexed under a page tagged with "jobs".
|
||||
The comments are appended to the matching posts, since the ChildDocTransformer is specified under the `fl` parameter.
|
||||
Here for example is a query where:
|
||||
|
||||
[source,json]
|
||||
* the (sku) documents returned must have a color of "RED"
|
||||
* the (sku) docments returned must be the descendents of root level (product) documents which have:
|
||||
** immediate child "manuals" documents which have:
|
||||
*** "lifetime guarantee" in their content
|
||||
* each return (sku) document also includes any descendent (manuals) documents it has
|
||||
|
||||
[source,bash]
|
||||
----
|
||||
{ "response":{"numFound":1,"start":0,"docs":[
|
||||
$ curl 'http://localhost:8983/solr/gettingstarted/select' -d 'omitHeader=true' -d 'fq=color_s:RED' --data-urlencode 'q={!child of="*:* -_nest_path_:*" filters=$parent_fq}' --data-urlencode 'parent_fq={!parent which="*:* -_nest_path_:*"}(+_nest_path_:"/manuals" +content_t:"lifetime guarantee")' -d 'fl=*,[child]'
|
||||
{
|
||||
"response":{"numFound":1,"start":0,"maxScore":1.4E-45,"numFoundExact":true,"docs":[
|
||||
{
|
||||
"ID": "7",
|
||||
"title": "Search Engineer",
|
||||
"comments": [{
|
||||
"ID": "8",
|
||||
"content": "I am interested"
|
||||
},
|
||||
"id":"P11!S21",
|
||||
"color_s":"RED",
|
||||
"price_i":42,
|
||||
"_version_":1676585794196733952,
|
||||
"manuals":[
|
||||
{
|
||||
"ID": "9",
|
||||
"content": "How large is the team?"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"ID": "10",
|
||||
"title": "Low level Engineer"
|
||||
}]
|
||||
}
|
||||
}
|
||||
"id":"P11!D41",
|
||||
"name_s":"Red Swingline Brochure",
|
||||
"pages_i":1,
|
||||
"content_t":"...",
|
||||
"_version_":1676585794196733952}]}]
|
||||
}}
|
||||
----
|
||||
|
|
|
@ -124,20 +124,24 @@ A default style can be configured by specifying an `args` parameter in your `sol
|
|||
|
||||
=== [child] - ChildDocTransformerFactory
|
||||
|
||||
This transformer returns all <<indexing-nested-documents.adoc#indexing-nested-documents,descendant documents>> of each parent document matching your query.
|
||||
This is useful when you have indexed nested child documents and want to retrieve the child documents for the relevant parent documents for any type of search query.
|
||||
|
||||
This transformer returns all <<indexing-nested-documents.adoc#indexing-nested-documents,descendant documents>> of each parent document matching your query. This is useful when you have indexed nested child documents and want to retrieve the child documents for the relevant parent documents for any type of search query.
|
||||
|
||||
Note that this transformer can be used even when the query used to match the result documents is not a <<other-parsers.adoc#block-join-query-parsers,Block Join query>>.
|
||||
|
||||
|
||||
[source,plain]
|
||||
----
|
||||
fl=id,[child parentFilter=doc_type:book childFilter=doc_type:chapter limit=100]
|
||||
q=book_title:Solr&fl=id,[child parentFilter=doc_type:book childFilter=doc_type:chapter limit=100]
|
||||
----
|
||||
|
||||
Note that this transformer can be used even though the query itself is not a <<other-parsers.adoc#block-join-query-parsers,Block Join query>>.
|
||||
If the documents involved include a `\_nest_path_` field, then it is used to re-create the hierarchical structure of the descendent documents using the original psuedo-field names the documents were indexed with, otherwise the descendent documents are returned as a flat list of <<indexing-nested-documents#indexing-anonymous-children,anonymous children>>.
|
||||
|
||||
When using this transformer, the `parentFilter` parameter must be specified _unless_ the schema declares `\_nest_path_`. It works the same as in all Block Join Queries. Additional optional parameters are:
|
||||
`parentFilter`::
|
||||
When using a schema that does _not_ include the `\_nest_path_` field, this parameter is mandatory, and serves the same purpose as the `of`/`which` parms in `{!child}`/`{!parent}` query parsers: to identify the set of "all parents" for the purpose of identifying the begining & end of each nested document block. *When a schema _does_ include a `\_nest_path_` field, this parameter is prohibited.*
|
||||
|
||||
`childFilter`::
|
||||
A query to filter which child documents should be included. This can be particularly useful when you have multiple levels of hierarchical documents. The default is all children. This query supports a special syntax to match nested doc patterns so long as `\_nest_path_` is defined in the schema and the query contains a `/` preceding the first `:`. Example: `childFilter=/comments/content:recipe` Further details of this are experimental.
|
||||
A query to filter which child documents should be included. This can be particularly useful when you have multiple levels of hierarchical documents. The default is all children.
|
||||
|
||||
`limit`::
|
||||
The maximum number of child documents to be returned per parent document. The default is `10`.
|
||||
|
@ -148,6 +152,33 @@ The field list which the transformer is to return. The default is the top level
|
|||
There is a further limitation in which the fields here should be a subset of those specified by the top level `fl` parameter.
|
||||
|
||||
|
||||
[TIP]
|
||||
====
|
||||
.Experimental `childFilter` Syntax
|
||||
|
||||
When a `\_nest_path_` field is defined, the `childFilter` option supports an experimental syntax to combine a "path syntax" restriction with a more traditional filtering query.
|
||||
|
||||
*This syntax is triggered by including a `/` seperated path structure prior to a query that includes a `:` character.*
|
||||
|
||||
When the "path" begins with a `/` character, it restricts matches to documents that have that exist "path" of nested psuedo-field documents, starting at the Root document of the block (even if the document being transformed is not a Root level document)
|
||||
|
||||
Some Examples:
|
||||
|
||||
* `childFilter="/skus/\*:*"`
|
||||
** Matches any documents that are descendents of the current document and have a "nested path" of `/skus` -- but not any children of those `skus`
|
||||
* childFilter="/skus/color_s:RED"
|
||||
** Matches any documents that are descendents of the current document; match `color_s:RED`; and have a "nested path" of `/skus` -- but not any children of those `skus`
|
||||
* `childFilter="/skus/manuals/\*:*"`
|
||||
** Matches any documents that are descendents of the current document and have a "nested path" of `/skus/manuals` -- but not any children of those `manuals`
|
||||
|
||||
When paths do not start with a `/` they are treated as "path suffixes":
|
||||
|
||||
* `childFilter="manuals/\*:*"`
|
||||
** Matches any documents that are descendents of the current document and have a "nested path" that ends with "manuals", regardless of how deeply nested they are -- but not any children of those `manuals`
|
||||
|
||||
====
|
||||
|
||||
|
||||
=== [shard] - ShardAugmenterFactory
|
||||
|
||||
This transformer adds information about what shard each individual document came from in a distributed request.
|
||||
|
|
|
@ -105,105 +105,97 @@ The resulting document in our collection will be:
|
|||
|
||||
=== Updating Child Documents
|
||||
|
||||
Solr supports modifying, adding and removing child documents as part of atomic updates. +
|
||||
Schema and configuration requirements are detailed in
|
||||
<<updating-parts-of-documents#field-storage, Field Storage>> and <<indexing-nested-documents#schema-configuration, Indexing Nested Documents>>. +
|
||||
Under the hood, Solr retrieves the whole nested structure, deletes the old documents,
|
||||
and reindexes the structure after applying the atomic update. +
|
||||
Syntactically, nested/partial updates are very similar to a regular atomic update,
|
||||
as demonstrated by the examples below.
|
||||
Solr supports modifying, adding and removing child documents as part of atomic updates. Syntactically, updates changing the children of a document are very similar to a regular atomic updates of simle fields, as demonstrated by the examples below.
|
||||
|
||||
[NOTE]
|
||||
Schema and configuration requirements for updating child documents the same <<updating-parts-of-documents#field-storage,Field Storage>> requirements for atomic updates mentioned above, combined with the <<indexing-nested-documents#schema-configuration,schema configuration rules for Indexing Nested Documents>> -- notably:
|
||||
* The `\_root_` field must configured with `stored="true"` or `docValues="true"`
|
||||
* The `\_nest_path_` field must exist (it is implicitly `docValues="true"`)
|
||||
|
||||
Under the hood, When Solr processes atomic updates on nested documents, it retrieves the entire block structure (up to and including the common "Root" document), reindexes the structure after applying the atomic update, and deletes the old documents.
|
||||
|
||||
[IMPORTANT]
|
||||
====
|
||||
.\_route_ Parameter
|
||||
To ensure each nested update is routed to its respective shard,
|
||||
`\_route_` parameter must be set to the root document's ID when the
|
||||
update does not have that root document.
|
||||
.Routing Updates using child document Ids in SolrCloud
|
||||
|
||||
When SolrCloud recieves document updates, the <<shards-and-indexing-data-in-solrcloud#document-routing,document routing>> rules for the collection is used to determine which shard should process the update based on the `id` of the document.
|
||||
|
||||
When sending an update that specifies the `id` of a _child document_ this will not work by default: the correct shard to send the document to is based on the `id` of the "Root" document for the block the child document is in, *not* the `id` of the child document being updated.
|
||||
|
||||
Solr offers two solutions to address this:
|
||||
|
||||
* Clients may specify a <<shards-and-indexing-data-in-solrcloud#document-routing,`\_route_` parameter>>, with the `id` of the Root document as the parameter value, on each update to tell Solr which shard should process the update.
|
||||
* Clients can use the (default) `compositeId` router's "prefix routing" feature when indexing all documents to ensure that all child/descendent documents in a Block use the same `id` prefix as the Root level document. This will cause Solr's default routing logic to automatically send child document updates to the correct shard.
|
||||
|
||||
All of the examples below use `id` prefixes, so no `\_route_` param will be neccessary for these examples.
|
||||
====
|
||||
|
||||
If the following document exists in our collection:
|
||||
For the upcoming examples, we'll assume an index containing the same documents covered in <<indexing-nested-documents#example-indexing-syntax,Indexing Nested Documents>>:
|
||||
|
||||
[source,json]
|
||||
include::indexing-nested-documents.adoc[tag=sample-indexing-deeply-nested-documents]
|
||||
|
||||
==== Modifying Child Document Fields
|
||||
|
||||
All of the <<#atomic-updates,Atomic Update operations>> mentioned above are supported for "real" fields of Child Documents:
|
||||
|
||||
[source,bash]
|
||||
----
|
||||
curl -X POST 'http://localhost:8983/solr/gettingstarted/update?commit=true' -H 'Content-Type: application/json' --data-binary '[
|
||||
{
|
||||
"id":"mydoc",
|
||||
"product":"T-Shirt",
|
||||
"stock": {
|
||||
"id":"mydoc2",
|
||||
"color":"red",
|
||||
"size": ["L"]
|
||||
}
|
||||
}
|
||||
"id": "P11!S31",
|
||||
"price_i": { "inc": 73 },
|
||||
"color_s": { "set": "GREY" }
|
||||
} ]'
|
||||
----
|
||||
|
||||
And we apply the following update command:
|
||||
==== Replacing all child documents
|
||||
|
||||
[source,json]
|
||||
As with normal (multiValued) fields, the `set` keyword can be used to replace all child documents in a psuedo-field:
|
||||
|
||||
[source,bash]
|
||||
----
|
||||
curl -X POST 'http://localhost:8983/solr/gettingstarted/update?commit=true' -H 'Content-Type: application/json' --data-binary '[
|
||||
{
|
||||
"id":"mydoc",
|
||||
"stock": {
|
||||
"add":
|
||||
{
|
||||
"id":"mydoc3",
|
||||
"color":"blue",
|
||||
"size": ["M"]
|
||||
}
|
||||
}
|
||||
}
|
||||
----
|
||||
|
||||
The resulting document in our collection will be:
|
||||
|
||||
[source,json]
|
||||
----
|
||||
{
|
||||
"id":"mydoc",
|
||||
"product":"T-Shirt",
|
||||
"stock": [{
|
||||
"id":"mydoc2",
|
||||
"color":"red",
|
||||
"size": ["L"]
|
||||
"id": "P22!S22",
|
||||
"manuals": { "set": [ { "id": "P22!D77",
|
||||
"name_s": "Why Red Pens Are the Best",
|
||||
"content_t": "... correcting papers ...",
|
||||
},
|
||||
{
|
||||
"id":"mydoc3",
|
||||
"color":"blue",
|
||||
"size": ["M"]
|
||||
}]
|
||||
}
|
||||
{ "id": "P22!D88",
|
||||
"name_s": "How to get Red ink stains out of fabric",
|
||||
"content_t": "... vinegar ...",
|
||||
} ] }
|
||||
|
||||
} ]'
|
||||
----
|
||||
|
||||
Documents inside nested structures can also be updated.
|
||||
These type of updates require setting the `\_route_` set to the root document's ID
|
||||
==== Adding a child document
|
||||
|
||||
If we send this update, setting `\_route_`=mydoc
|
||||
As with normal (multiValued) fields, the `add` keyword can be used to add additional child documents to a psuedo-field:
|
||||
|
||||
[source,json]
|
||||
[source,bash]
|
||||
----
|
||||
curl -X POST 'http://localhost:8983/solr/gettingstarted/update?commit=true' -H 'Content-Type: application/json' --data-binary '[
|
||||
{
|
||||
"id":"mydoc2",
|
||||
"size": {"add": ["S"]}
|
||||
}
|
||||
"id": "P11!S21",
|
||||
"manuals": { "add": { "id": "P11!D99",
|
||||
"name_s": "Why Red Staplers Are the Best",
|
||||
"content_t": "Once upon a time, Mike Judge ...",
|
||||
} }
|
||||
} ]'
|
||||
----
|
||||
|
||||
The resulting document in our collection will be:
|
||||
|
||||
[source,json]
|
||||
==== Removing a child document
|
||||
|
||||
As with normal (multiValued) fields, the `remove` keyword can be used to remove a child document (by `id`) from it's psuedo-field:
|
||||
|
||||
[source,bash]
|
||||
----
|
||||
curl -X POST 'http://localhost:8983/solr/gettingstarted/update?commit=true' -H 'Content-Type: application/json' --data-binary '[
|
||||
{
|
||||
"id":"mydoc",
|
||||
"product":"T-Shirt",
|
||||
"stock": [{
|
||||
"id":"mydoc2",
|
||||
"color":"red",
|
||||
"size": ["L", "S"]
|
||||
},
|
||||
{
|
||||
"id":"mydoc3",
|
||||
"color":"blue",
|
||||
"size": ["M"]
|
||||
}]
|
||||
}
|
||||
"id": "P11!S21",
|
||||
"manuals": { "remove": { "id": "P11!D41" } }
|
||||
} ]'
|
||||
----
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,280 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.client.ref_guide_examples;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.solr.client.solrj.SolrClient;
|
||||
import org.apache.solr.client.solrj.SolrQuery;
|
||||
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
|
||||
|
||||
import org.apache.solr.cloud.SolrCloudTestCase;
|
||||
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.SolrDocumentList;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.util.ExternalPaths;
|
||||
|
||||
import org.junit.After;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
/**
|
||||
* Example SolrJ usage for indexing nested documents
|
||||
*
|
||||
* Snippets surrounded by "tag" and "end" comments are extracted and used in the Solr Reference Guide.
|
||||
*/
|
||||
public class IndexingNestedDocuments extends SolrCloudTestCase {
|
||||
public static final String ANON_KIDS_CONFIG = "anon_kids_configset";
|
||||
@BeforeClass
|
||||
public static void setupCluster() throws Exception {
|
||||
configureCluster(1)
|
||||
// when indexing 'anonymous' kids, we need a schema that doesn't use _nest_path_ so
|
||||
// that we can use [child] transformer with a parentFilter...
|
||||
.addConfig(ANON_KIDS_CONFIG, new File(ExternalPaths.TECHPRODUCTS_CONFIGSET).toPath())
|
||||
.configure();
|
||||
}
|
||||
|
||||
@After
|
||||
public void cleanCollections() throws Exception {
|
||||
cluster.deleteAllCollections();
|
||||
}
|
||||
|
||||
/**
|
||||
* Syntactic sugar so code snippet doesn't refer to test-framework specific method name
|
||||
*/
|
||||
public static SolrClient getSolrClient() {
|
||||
return cluster.getSolrClient();
|
||||
}
|
||||
|
||||
/**
|
||||
* Demo of using anonymous children when indexing hierarchical documents.
|
||||
* This test code is used as an 'include' from the ref-guide
|
||||
*/
|
||||
public void testIndexingAnonKids() throws Exception {
|
||||
final String collection = "test_anon";
|
||||
CollectionAdminRequest.createCollection(collection, ANON_KIDS_CONFIG, 1, 1).process(cluster.getSolrClient());
|
||||
cluster.getSolrClient().setDefaultCollection(collection);
|
||||
|
||||
//
|
||||
// DO NOT MODIFY THESE EXAMPLE DOCS WITH OUT MAKING THE SAME CHANGES TO THE JSON AND XML
|
||||
// EQUIVILENT EXAMPLES IN 'indexing-nested-documents.adoc'
|
||||
//
|
||||
|
||||
// tag::anon-kids[]
|
||||
final SolrClient client = getSolrClient();
|
||||
|
||||
final SolrInputDocument p1 = new SolrInputDocument();
|
||||
p1.setField("id", "P11!prod");
|
||||
p1.setField("type_s", "PRODUCT");
|
||||
p1.setField("name_s", "Swingline Stapler");
|
||||
p1.setField("description_t", "The Cadillac of office staplers ...");
|
||||
{
|
||||
final SolrInputDocument s1 = new SolrInputDocument();
|
||||
s1.setField("id", "P11!S21");
|
||||
s1.setField("type_s", "SKU");
|
||||
s1.setField("color_s", "RED");
|
||||
s1.setField("price_i", 42);
|
||||
{
|
||||
final SolrInputDocument m1 = new SolrInputDocument();
|
||||
m1.setField("id", "P11!D41");
|
||||
m1.setField("type_s", "MANUAL");
|
||||
m1.setField("name_s", "Red Swingline Brochure");
|
||||
m1.setField("pages_i", 1);
|
||||
m1.setField("content_t", "...");
|
||||
|
||||
s1.addChildDocument(m1);
|
||||
}
|
||||
|
||||
final SolrInputDocument s2 = new SolrInputDocument();
|
||||
s2.setField("id", "P11!S31");
|
||||
s2.setField("type_s", "SKU");
|
||||
s2.setField("color_s", "BLACK");
|
||||
s2.setField("price_i", 3);
|
||||
|
||||
final SolrInputDocument m1 = new SolrInputDocument();
|
||||
m1.setField("id", "P11!D51");
|
||||
m1.setField("type_s", "MANUAL");
|
||||
m1.setField("name_s", "Quick Reference Guide");
|
||||
m1.setField("pages_i", 1);
|
||||
m1.setField("content_t", "How to use your stapler ...");
|
||||
|
||||
final SolrInputDocument m2 = new SolrInputDocument();
|
||||
m2.setField("id", "P11!D61");
|
||||
m2.setField("type_s", "MANUAL");
|
||||
m2.setField("name_s", "Warranty Details");
|
||||
m2.setField("pages_i", 42);
|
||||
m2.setField("content_t", "... lifetime guarantee ...");
|
||||
|
||||
p1.addChildDocuments(Arrays.asList(s1, s2, m1, m2));
|
||||
}
|
||||
|
||||
client.add(p1);
|
||||
// end::anon-kids[]
|
||||
|
||||
client.commit();
|
||||
|
||||
final SolrDocumentList docs = getSolrClient().query
|
||||
(new SolrQuery("description_t:Cadillac").set("fl", "*,[child parentFilter='type_s:PRODUCT']")).getResults();
|
||||
|
||||
assertEquals(1, docs.getNumFound());
|
||||
assertEquals("P11!prod", docs.get(0).getFieldValue("id"));
|
||||
|
||||
// [child] returns a flat list of all (anon) descendents
|
||||
assertEquals(5, docs.get(0).getChildDocumentCount());
|
||||
assertEquals(5, docs.get(0).getChildDocuments().size());
|
||||
|
||||
// flat list is depth first...
|
||||
final SolrDocument red_stapler_brochure = docs.get(0).getChildDocuments().get(0);
|
||||
assertEquals("P11!D41", red_stapler_brochure.getFieldValue("id"));
|
||||
|
||||
final SolrDocument red_stapler = docs.get(0).getChildDocuments().get(1);
|
||||
assertEquals("P11!S21", red_stapler.getFieldValue("id"));
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Demo of using <code>NestPath</code> related psuedo-fields when indexing hierarchical documents.
|
||||
* This test code is used as an 'include' from the ref-guide
|
||||
*/
|
||||
public void testIndexingUsingNestPath() throws Exception {
|
||||
final String collection = "test_anon";
|
||||
CollectionAdminRequest.createCollection(collection, 1, 1).process(cluster.getSolrClient());
|
||||
cluster.getSolrClient().setDefaultCollection(collection);
|
||||
|
||||
//
|
||||
// DO NOT MODIFY THESE EXAMPLE DOCS WITH OUT MAKING THE SAME CHANGES TO THE JSON AND XML
|
||||
// EQUIVILENT EXAMPLES IN 'indexing-nested-documents.adoc'
|
||||
//
|
||||
|
||||
// tag::nest-path[]
|
||||
final SolrClient client = getSolrClient();
|
||||
|
||||
final SolrInputDocument p1 = new SolrInputDocument();
|
||||
p1.setField("id", "P11!prod");
|
||||
p1.setField("name_s", "Swingline Stapler");
|
||||
p1.setField("description_t", "The Cadillac of office staplers ...");
|
||||
{
|
||||
final SolrInputDocument s1 = new SolrInputDocument();
|
||||
s1.setField("id", "P11!S21");
|
||||
s1.setField("color_s", "RED");
|
||||
s1.setField("price_i", 42);
|
||||
{
|
||||
final SolrInputDocument m1 = new SolrInputDocument();
|
||||
m1.setField("id", "P11!D41");
|
||||
m1.setField("name_s", "Red Swingline Brochure");
|
||||
m1.setField("pages_i", 1);
|
||||
m1.setField("content_t", "...");
|
||||
|
||||
s1.setField("manuals", m1);
|
||||
}
|
||||
|
||||
final SolrInputDocument s2 = new SolrInputDocument();
|
||||
s2.setField("id", "P11!S31");
|
||||
s2.setField("color_s", "BLACK");
|
||||
s2.setField("price_i", 3);
|
||||
|
||||
p1.setField("skus", Arrays.asList(s1, s2));
|
||||
}
|
||||
{
|
||||
final SolrInputDocument m1 = new SolrInputDocument();
|
||||
m1.setField("id", "P11!D51");
|
||||
m1.setField("name_s", "Quick Reference Guide");
|
||||
m1.setField("pages_i", 1);
|
||||
m1.setField("content_t", "How to use your stapler ...");
|
||||
|
||||
final SolrInputDocument m2 = new SolrInputDocument();
|
||||
m2.setField("id", "P11!D61");
|
||||
m2.setField("name_s", "Warranty Details");
|
||||
m2.setField("pages_i", 42);
|
||||
m2.setField("content_t", "... lifetime guarantee ...");
|
||||
|
||||
p1.setField("manuals", Arrays.asList(m1, m2));
|
||||
}
|
||||
|
||||
final SolrInputDocument p2 = new SolrInputDocument();
|
||||
p2.setField("id", "P22!prod");
|
||||
p2.setField("name_s", "Mont Blanc Fountain Pen");
|
||||
p2.setField("description_t", "A Premium Writing Instrument ...");
|
||||
{
|
||||
final SolrInputDocument s1 = new SolrInputDocument();
|
||||
s1.setField("id", "P22!S22");
|
||||
s1.setField("color_s", "RED");
|
||||
s1.setField("price_i", 89);
|
||||
{
|
||||
final SolrInputDocument m1 = new SolrInputDocument();
|
||||
m1.setField("id", "P22!D42");
|
||||
m1.setField("name_s", "Red Mont Blanc Brochure");
|
||||
m1.setField("pages_i", 1);
|
||||
m1.setField("content_t", "...");
|
||||
|
||||
s1.setField("manuals", m1);
|
||||
}
|
||||
|
||||
final SolrInputDocument s2 = new SolrInputDocument();
|
||||
s2.setField("id", "P22!S32");
|
||||
s2.setField("color_s", "BLACK");
|
||||
s2.setField("price_i", 67);
|
||||
|
||||
p2.setField("skus", Arrays.asList(s1, s2));
|
||||
}
|
||||
{
|
||||
final SolrInputDocument m1 = new SolrInputDocument();
|
||||
m1.setField("id", "P22!D52");
|
||||
m1.setField("name_s", "How To Use A Pen");
|
||||
m1.setField("pages_i", 42);
|
||||
m1.setField("content_t", "Start by removing the cap ...");
|
||||
|
||||
p2.setField("manuals", m1);
|
||||
}
|
||||
|
||||
client.add(Arrays.asList(p1, p2));
|
||||
// end::nest-path[]
|
||||
|
||||
client.commit();
|
||||
|
||||
|
||||
// Now a quick sanity check that the nest path is working properly...
|
||||
|
||||
final SolrDocumentList docs = getSolrClient().query
|
||||
(new SolrQuery("description_t:Writing").set("fl", "*,[child]")).getResults();
|
||||
|
||||
assertEquals(1, docs.getNumFound());
|
||||
assertEquals("P22!prod", docs.get(0).getFieldValue("id"));
|
||||
|
||||
assertEquals(1, docs.get(0).getFieldValues("manuals").size());
|
||||
assertEquals(SolrDocument.class, docs.get(0).getFieldValues("manuals").iterator().next().getClass());
|
||||
|
||||
assertEquals(2, docs.get(0).getFieldValues("skus").size());
|
||||
final List<Object> skus = new ArrayList<>(docs.get(0).getFieldValues("skus"));
|
||||
|
||||
assertEquals(SolrDocument.class, skus.get(0).getClass());
|
||||
assertEquals(SolrDocument.class, skus.get(1).getClass());
|
||||
|
||||
final SolrDocument red_pen = (SolrDocument) skus.get(0);
|
||||
assertEquals("P22!S22", red_pen.getFieldValue("id"));
|
||||
|
||||
assertEquals(1, red_pen.getFieldValues("manuals").size());
|
||||
assertEquals(SolrDocument.class, red_pen.getFieldValues("manuals").iterator().next().getClass());
|
||||
|
||||
final SolrDocument red_pen_brochure = (SolrDocument) red_pen.getFieldValues("manuals").iterator().next();
|
||||
assertEquals("P22!D42", red_pen_brochure.getFieldValue("id"));
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue