SOLR-15069: [child parentFilter=...] is now optional (#2181)

This commit is contained in:
David Smiley 2021-01-06 17:43:15 -05:00 committed by GitHub
parent 7b9f875145
commit 3147625890
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 105 additions and 35 deletions

View File

@ -236,6 +236,9 @@ Improvements
* SOLR-15062: /api/cluster/zk/ls should give the stat of the current node (noble) * SOLR-15062: /api/cluster/zk/ls should give the stat of the current node (noble)
* SOLR-15069: [child]: the parentFilter parameter is now fully optional and perhaps obsolete.
(David Smiley)
Optimizations Optimizations
--------------------- ---------------------
* SOLR-14975: Optimize CoreContainer.getAllCoreNames, getLoadedCoreNames and getCoreDescriptors. (Bruno Roustant) * SOLR-14975: Optimize CoreContainer.getAllCoreNames, getLoadedCoreNames and getCoreDescriptors. (Bruno Roustant)

View File

@ -28,14 +28,22 @@ import java.util.Map;
import com.google.common.collect.ArrayListMultimap; import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.Multimap; import com.google.common.collect.Multimap;
import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.join.BitSetProducer; import org.apache.lucene.search.join.BitSetProducer;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BitSet; import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.search.BitsFilteredPostingsEnum;
import org.apache.solr.search.DocSet; import org.apache.solr.search.DocSet;
import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SolrReturnFields; import org.apache.solr.search.SolrReturnFields;
@ -52,20 +60,23 @@ class ChildDocTransformer extends DocTransformer {
private static final String ANON_CHILD_KEY = "_childDocuments_"; private static final String ANON_CHILD_KEY = "_childDocuments_";
private final String name; private final String name;
private final BitSetProducer parentsFilter; private final BitSetProducer parentsFilter; // if null; resolve parent via uniqueKey instead
private final DocSet childDocSet; private final DocSet childDocSet;
private final int limit; private final int limit;
private final boolean isNestedSchema; private final boolean isNestedSchema;
private final SolrReturnFields childReturnFields; private final SolrReturnFields childReturnFields;
private String[] extraRequestedFields;
ChildDocTransformer(String name, BitSetProducer parentsFilter, DocSet childDocSet, ChildDocTransformer(String name, BitSetProducer parentsFilter, DocSet childDocSet,
SolrReturnFields returnFields, boolean isNestedSchema, int limit) { SolrReturnFields returnFields, boolean isNestedSchema, int limit,
String uniqueKeyField) {
this.name = name; this.name = name;
this.parentsFilter = parentsFilter; this.parentsFilter = parentsFilter;
this.childDocSet = childDocSet; this.childDocSet = childDocSet;
this.limit = limit; this.limit = limit;
this.isNestedSchema = isNestedSchema; this.isNestedSchema = isNestedSchema;
this.childReturnFields = returnFields!=null? returnFields: new SolrReturnFields(); this.childReturnFields = returnFields!=null? returnFields: new SolrReturnFields();
this.extraRequestedFields = parentsFilter == null ? new String[]{uniqueKeyField} : null;
} }
@Override @Override
@ -76,6 +87,37 @@ class ChildDocTransformer extends DocTransformer {
@Override @Override
public boolean needsSolrIndexSearcher() { return true; } public boolean needsSolrIndexSearcher() { return true; }
@Override
public String[] getExtraRequestFields() {
return extraRequestedFields;
}
private int getPrevRootGivenFilter(LeafReaderContext leafReaderContext, int segRootId) throws IOException {
final BitSet segParentsBitSet = parentsFilter.getBitSet(leafReaderContext);
if (segParentsBitSet != null) {
return segRootId == 0 ? -1 : segParentsBitSet.prevSetBit(segRootId - 1);
}
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"Parent filter '" + parentsFilter + "' doesn't match any parent documents");
}
private int getPrevRootGivenId(LeafReaderContext leafReaderContext, int segRootId,
BytesRef idBytes) throws IOException {
final LeafReader reader = leafReaderContext.reader();
final Terms terms = reader.terms(IndexSchema.ROOT_FIELD_NAME); // never returns null here
final TermsEnum iterator = terms.iterator();
if (iterator.seekExact(idBytes)) {
PostingsEnum docs = iterator.postings(null, PostingsEnum.NONE);
docs = BitsFilteredPostingsEnum.wrap(docs, reader.getLiveDocs());
int id = docs.nextDoc();
if (id != DocIdSetIterator.NO_MORE_DOCS) {
assert id <= segRootId : "integrity violation";
return id - 1;
}
}
return segRootId - 1; // thus no child docs
}
@Override @Override
public void transform(SolrDocument rootDoc, int rootDocId) { public void transform(SolrDocument rootDoc, int rootDocId) {
// note: this algorithm works if both if we have have _nest_path_ and also if we don't! // note: this algorithm works if both if we have have _nest_path_ and also if we don't!
@ -87,17 +129,24 @@ class ChildDocTransformer extends DocTransformer {
final List<LeafReaderContext> leaves = searcher.getIndexReader().leaves(); final List<LeafReaderContext> leaves = searcher.getIndexReader().leaves();
final int seg = ReaderUtil.subIndex(rootDocId, leaves); final int seg = ReaderUtil.subIndex(rootDocId, leaves);
final LeafReaderContext leafReaderContext = leaves.get(seg); final LeafReaderContext leafReaderContext = leaves.get(seg);
final Bits liveDocs = leafReaderContext.reader().getLiveDocs();
final int segBaseId = leafReaderContext.docBase; final int segBaseId = leafReaderContext.docBase;
final int segRootId = rootDocId - segBaseId; final int segRootId = rootDocId - segBaseId;
final BitSet segParentsBitSet = parentsFilter.getBitSet(leafReaderContext);
final Bits liveDocs = leafReaderContext.reader().getLiveDocs();
if (segParentsBitSet == null) { // can return be -1 and that's okay (happens for very first block)
final int segPrevRootId;
if (parentsFilter != null) {
segPrevRootId = getPrevRootGivenFilter(leafReaderContext, segRootId);
} else {
final IndexSchema schema = searcher.getSchema();
final String idStr = schema.printableUniqueKey(rootDoc);
if (idStr == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"Parent filter '" + parentsFilter + "' doesn't match any parent documents"); "[child] requires fl to include the ID");
}
final BytesRef idBytes = schema.indexableUniqueKey(idStr);
segPrevRootId = getPrevRootGivenId(leafReaderContext, rootDocId, idBytes);
} }
final int segPrevRootId = segRootId==0? -1: segParentsBitSet.prevSetBit(segRootId - 1); // can return -1 and that's okay
if (segPrevRootId == (segRootId - 1)) { if (segPrevRootId == (segRootId - 1)) {
// doc has no children, return fast // doc has no children, return fast

View File

@ -41,7 +41,9 @@ import static org.apache.solr.schema.IndexSchema.NEST_PATH_FIELD_NAME;
/** /**
* Attaches all descendants (child documents) to each parent document. * Attaches all descendants (child documents) to each parent document.
* *
* The "parentFilter" parameter is mandatory if the schema is not of nest/hierarchy. * Optionally you can provide a "parentFilter" param to designate which documents are the root
* documents (parent-most documents). Solr can figure this out on its own but you might want to
* specify it.
* *
* Optionally you can provide a "childFilter" param to filter out which child documents should be returned and a * Optionally you can provide a "childFilter" param to filter out which child documents should be returned and a
* "limit" param which provides an option to specify the number of child documents * "limit" param which provides an option to specify the number of child documents
@ -96,10 +98,7 @@ public class ChildDocTransformerFactory extends TransformerFactory {
// DocSet parentDocSet = req.getSearcher().getDocSet(parentFilterQuery); // DocSet parentDocSet = req.getSearcher().getDocSet(parentFilterQuery);
// then return BitSetProducer with custom BitSet impl accessing the docSet // then return BitSetProducer with custom BitSet impl accessing the docSet
if (parentFilterStr == null) { if (parentFilterStr == null) {
if (!buildHierarchy) { parentsFilter = !buildHierarchy ? null : new QueryBitSetProducer(rootFilter);
throw new SolrException(ErrorCode.BAD_REQUEST, "Parent filter should be sent as parentFilter=filterCondition");
}
parentsFilter = new QueryBitSetProducer(rootFilter);
} else { } else {
if(buildHierarchy) { if(buildHierarchy) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Parent filter should not be sent when the schema is nested"); throw new SolrException(ErrorCode.BAD_REQUEST, "Parent filter should not be sent when the schema is nested");
@ -137,7 +136,8 @@ public class ChildDocTransformerFactory extends TransformerFactory {
int limit = params.getInt( "limit", 10 ); int limit = params.getInt( "limit", 10 );
return new ChildDocTransformer(field, parentsFilter, childDocSet, childSolrReturnFields, buildHierarchy, limit); return new ChildDocTransformer(field, parentsFilter, childDocSet, childSolrReturnFields,
buildHierarchy, limit, req.getSchema().getUniqueKeyField().getName());
} }
private static Query parseQuery(String qstr, SolrQueryRequest req, String param) { private static Query parseQuery(String qstr, SolrQueryRequest req, String param) {

View File

@ -47,6 +47,7 @@ import org.apache.lucene.analysis.DelegatingAnalyzerWrapper;
import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.IndexableField;
import org.apache.lucene.queries.payloads.PayloadDecoder; import org.apache.lucene.queries.payloads.PayloadDecoder;
import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Version; import org.apache.lucene.util.Version;
import org.apache.solr.common.ConfigNode; import org.apache.solr.common.ConfigNode;
import org.apache.solr.common.MapSerializable; import org.apache.solr.common.MapSerializable;
@ -359,6 +360,11 @@ public class IndexSchema {
} }
} }
/** Given a readable/printable uniqueKey value, return an indexable version */
public BytesRef indexableUniqueKey(String idStr) {
return new BytesRef(uniqueKeyFieldType.toInternal(idStr));
}
private SchemaField getIndexedField(String fname) { private SchemaField getIndexedField(String fname) {
SchemaField f = getFields().get(fname); SchemaField f = getFields().get(fname);
if (f==null) { if (f==null) {

View File

@ -96,13 +96,14 @@ public class TestChildDocTransformer extends SolrTestCaseJ4 {
"/response/result/doc[1]/doc[2]/str[@name='id']='5'"}; "/response/result/doc[1]/doc[2]/str[@name='id']='5'"};
assertQ(req("q", "*:*", "fq", "subject:\"parentDocument\" ", assertQ(req("q", "*:*", "fq", "subject:\"parentDocument\" ",
"fl", "*,[child parentFilter=\"subject:parentDocument\"]"), test1); "fl", "*,[child]"), test1);
// shows parentFilter specified (not necessary any more) and also child
assertQ(req("q", "*:*", "fq", "subject:\"parentDocument\" ",
"fl", "id, subject,[child childFilter=\"title:foo\"]"), test2);
assertQ(req("q", "*:*", "fq", "subject:\"parentDocument\" ", assertQ(req("q", "*:*", "fq", "subject:\"parentDocument\" ",
"fl", "id, subject,[child parentFilter=\"subject:parentDocument\" childFilter=\"title:foo\"]"), test2); "fl", "id, subject,[child childFilter=\"title:bar\" limit=2]"), test3);
assertQ(req("q", "*:*", "fq", "subject:\"parentDocument\" ",
"fl", "id, subject,[child parentFilter=\"subject:parentDocument\" childFilter=\"title:bar\" limit=2]"), test3);
SolrException e = expectThrows(SolrException.class, () -> { SolrException e = expectThrows(SolrException.class, () -> {
h.query(req("q", "*:*", "fq", "subject:\"parentDocument\" ", h.query(req("q", "*:*", "fq", "subject:\"parentDocument\" ",
@ -240,13 +241,13 @@ public class TestChildDocTransformer extends SolrTestCaseJ4 {
}; };
assertJQ(req("q", "*:*", "fq", "subject:\"parentDocument\" ", assertJQ(req("q", "*:*", "fq", "subject:\"parentDocument\" ",
"fl", "*,[child parentFilter=\"subject:parentDocument\"]"), test1); "fl", "*,[child]"), test1);
assertJQ(req("q", "*:*", "fq", "subject:\"parentDocument\" ", assertJQ(req("q", "*:*", "fq", "subject:\"parentDocument\" ",
"fl", "id, subject,[child parentFilter=\"subject:parentDocument\" childFilter=\"title:foo\"]"), test2); "fl", "id, subject,[child childFilter=\"title:foo\"]"), test2);
assertJQ(req("q", "*:*", "fq", "subject:\"parentDocument\" ", assertJQ(req("q", "*:*", "fq", "subject:\"parentDocument\" ",
"fl", "id, subject,[child parentFilter=\"subject:parentDocument\" childFilter=\"title:bar\" limit=3]"), test3); "fl", "id, subject,[child childFilter=\"title:bar\" limit=3]"), test3);
} }
private void testChildDocNonStoredDVFields() throws Exception { private void testChildDocNonStoredDVFields() throws Exception {
@ -271,26 +272,26 @@ public class TestChildDocTransformer extends SolrTestCaseJ4 {
}; };
assertJQ(req("q", "*:*", "fq", "subject:\"parentDocument\" ", assertJQ(req("q", "*:*", "fq", "subject:\"parentDocument\" ",
"fl", "*,[child parentFilter=\"subject:parentDocument\"]"), test1); "fl", "*,[child]"), test1);
assertJQ(req("q", "*:*", "fq", "subject:\"parentDocument\" ", assertJQ(req("q", "*:*", "fq", "subject:\"parentDocument\" ",
"fl", "intDvoDefault, subject,[child parentFilter=\"subject:parentDocument\" childFilter=\"title:foo\"]"), test2); "fl", "intDvoDefault, subject,[child childFilter=\"title:foo\"]"), test2);
assertJQ(req("q", "*:*", "fq", "subject:\"parentDocument\" ", assertJQ(req("q", "*:*", "fq", "subject:\"parentDocument\" ",
"fl", "intDvoDefault, subject,[child parentFilter=\"subject:parentDocument\" childFilter=\"title:bar\" limit=2]"), test3); "fl", "intDvoDefault, subject,[child childFilter=\"title:bar\" limit=2]"), test3);
} }
private void testChildReturnFields() throws Exception { private void testChildReturnFields() throws Exception {
assertJQ(req("q", "*:*", "fq", "subject:\"parentDocument\" ", assertJQ(req("q", "*:*", "fq", "subject:\"parentDocument\" ",
"fl", "*,[child parentFilter=\"subject:parentDocument\" fl=\"intDvoDefault,child_fl:[value v='child_fl_test']\"]"), "fl", "*,[child fl=\"intDvoDefault,child_fl:[value v='child_fl_test']\"]"),
"/response/docs/[0]/intDefault==42", "/response/docs/[0]/intDefault==42",
"/response/docs/[0]/_childDocuments_/[0]/intDvoDefault==42", "/response/docs/[0]/_childDocuments_/[0]/intDvoDefault==42",
"/response/docs/[0]/_childDocuments_/[0]/child_fl=='child_fl_test'"); "/response/docs/[0]/_childDocuments_/[0]/child_fl=='child_fl_test'");
try(SolrQueryRequest req = req("q", "*:*", "fq", "subject:\"parentDocument\" ", try(SolrQueryRequest req = req("q", "*:*", "fq", "subject:\"parentDocument\" ",
"fl", "intDefault,[child parentFilter=\"subject:parentDocument\" fl=\"intDvoDefault, [docid]\"]")) { "fl", "intDefault,[child fl=\"intDvoDefault, [docid]\"]")) {
BasicResultContext res = (BasicResultContext) h.queryAndResponse("/select", req).getResponse(); BasicResultContext res = (BasicResultContext) h.queryAndResponse("/select", req).getResponse();
Iterator<SolrDocument> docsStreamer = res.getProcessedDocuments(); Iterator<SolrDocument> docsStreamer = res.getProcessedDocuments();
while (docsStreamer.hasNext()) { while (docsStreamer.hasNext()) {
@ -401,6 +402,14 @@ public class TestChildDocTransformer extends SolrTestCaseJ4 {
"fl", "id, cat, title, [child childFilter='cat:childDocument' parentFilter=\"subject:parentDocument\"]"), "fl", "id, cat, title, [child childFilter='cat:childDocument' parentFilter=\"subject:parentDocument\"]"),
tests); tests);
// shows if parentFilter matches all docs, then there are effectively no child docs
assertJQ(req("q", "*:*",
"sort", "id asc",
"fq", "subject:\"parentDocument\" ",
"fl", "id,[child childFilter='cat:childDocument' parentFilter=\"*:*\"]"),
"/response==" +
"{'numFound':2,'start':0,'numFoundExact':true,'docs':[{'id':'1'},{'id':'4'}]}");
} }
private void testSubQueryParentFilterJSON() throws Exception { private void testSubQueryParentFilterJSON() throws Exception {
@ -451,13 +460,13 @@ public class TestChildDocTransformer extends SolrTestCaseJ4 {
assertQ(req("q", "*:*", assertQ(req("q", "*:*",
"sort", "id asc", "sort", "id asc",
"fq", "subject:\"parentDocument\" ", "fq", "subject:\"parentDocument\" ",
"fl", "*,[child childFilter='cat:childDocument' parentFilter=\"subject:parentDocument\"]"), "fl", "*,[child childFilter='cat:childDocument']"),
tests); tests);
assertQ(req("q", "*:*", assertQ(req("q", "*:*",
"sort", "id asc", "sort", "id asc",
"fq", "subject:\"parentDocument\" ", "fq", "subject:\"parentDocument\" ",
"fl", "id, cat, title, [child childFilter='cat:childDocument' parentFilter=\"subject:parentDocument\"]"), "fl", "id, cat, title, [child childFilter='cat:childDocument']"),
tests); tests);
} }

View File

@ -132,14 +132,11 @@ Note that this transformer can be used even when the query used to match the res
[source,plain] [source,plain]
---- ----
q=book_title:Solr&fl=id,[child parentFilter=doc_type:book childFilter=doc_type:chapter limit=100] q=book_title:Solr&fl=id,[child childFilter=doc_type:chapter limit=100]
---- ----
If the documents involved include a `\_nest_path_` field, then it is used to re-create the hierarchical structure of the descendent documents using the original psuedo-field names the documents were indexed with, otherwise the descendent documents are returned as a flat list of <<indexing-nested-documents#indexing-anonymous-children,anonymous children>>. If the documents involved include a `\_nest_path_` field, then it is used to re-create the hierarchical structure of the descendent documents using the original psuedo-field names the documents were indexed with, otherwise the descendent documents are returned as a flat list of <<indexing-nested-documents#indexing-anonymous-children,anonymous children>>.
`parentFilter`::
When using a schema that does _not_ include the `\_nest_path_` field, this parameter is mandatory, and serves the same purpose as the `of`/`which` parms in `{!child}`/`{!parent}` query parsers: to identify the set of "all parents" for the purpose of identifying the begining & end of each nested document block. *When a schema _does_ include a `\_nest_path_` field, this parameter is prohibited.*
`childFilter`:: `childFilter`::
A query to filter which child documents should be included. This can be particularly useful when you have multiple levels of hierarchical documents. The default is all children. A query to filter which child documents should be included. This can be particularly useful when you have multiple levels of hierarchical documents. The default is all children.
@ -151,6 +148,12 @@ The field list which the transformer is to return. The default is the top level
+ +
There is a further limitation in which the fields here should be a subset of those specified by the top level `fl` parameter. There is a further limitation in which the fields here should be a subset of those specified by the top level `fl` parameter.
`parentFilter`::
Serves the same purpose as the `of`/`which` params in `{!child}`/`{!parent}` query parsers: to
identify the set of "all parents" for the purpose of identifying the beginning & end of each
nested document block. This recently became fully optional and appears to be obsolete.
It is likely to be removed in a future Solr release, so _if you find it has some use, let the
project know!_
[TIP] [TIP]
==== ====