From 31476258906716b0ae95b65ace8a5c2f7b113871 Mon Sep 17 00:00:00 2001 From: David Smiley Date: Wed, 6 Jan 2021 17:43:15 -0500 Subject: [PATCH] SOLR-15069: [child parentFilter=...] is now optional (#2181) --- solr/CHANGES.txt | 3 + .../transform/ChildDocTransformer.java | 69 ++++++++++++++++--- .../transform/ChildDocTransformerFactory.java | 12 ++-- .../org/apache/solr/schema/IndexSchema.java | 6 ++ .../transform/TestChildDocTransformer.java | 39 +++++++---- .../src/transforming-result-documents.adoc | 11 +-- 6 files changed, 105 insertions(+), 35 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index eb2e73d492d..adc0820d539 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -236,6 +236,9 @@ Improvements * SOLR-15062: /api/cluster/zk/ls should give the stat of the current node (noble) +* SOLR-15069: [child]: the parentFilter parameter is now fully optional and perhaps obsolete. + (David Smiley) + Optimizations --------------------- * SOLR-14975: Optimize CoreContainer.getAllCoreNames, getLoadedCoreNames and getCoreDescriptors. (Bruno Roustant) diff --git a/solr/core/src/java/org/apache/solr/response/transform/ChildDocTransformer.java b/solr/core/src/java/org/apache/solr/response/transform/ChildDocTransformer.java index de00b8278ca..ac89419228d 100644 --- a/solr/core/src/java/org/apache/solr/response/transform/ChildDocTransformer.java +++ b/solr/core/src/java/org/apache/solr/response/transform/ChildDocTransformer.java @@ -28,14 +28,22 @@ import java.util.Map; import com.google.common.collect.ArrayListMultimap; import com.google.common.collect.Multimap; import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.join.BitSetProducer; -import org.apache.lucene.util.Bits; import org.apache.lucene.util.BitSet; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrException; +import org.apache.solr.schema.IndexSchema; +import org.apache.solr.search.BitsFilteredPostingsEnum; import org.apache.solr.search.DocSet; import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.search.SolrReturnFields; @@ -52,20 +60,23 @@ class ChildDocTransformer extends DocTransformer { private static final String ANON_CHILD_KEY = "_childDocuments_"; private final String name; - private final BitSetProducer parentsFilter; + private final BitSetProducer parentsFilter; // if null; resolve parent via uniqueKey instead private final DocSet childDocSet; private final int limit; private final boolean isNestedSchema; private final SolrReturnFields childReturnFields; + private String[] extraRequestedFields; ChildDocTransformer(String name, BitSetProducer parentsFilter, DocSet childDocSet, - SolrReturnFields returnFields, boolean isNestedSchema, int limit) { + SolrReturnFields returnFields, boolean isNestedSchema, int limit, + String uniqueKeyField) { this.name = name; this.parentsFilter = parentsFilter; this.childDocSet = childDocSet; this.limit = limit; this.isNestedSchema = isNestedSchema; this.childReturnFields = returnFields!=null? returnFields: new SolrReturnFields(); + this.extraRequestedFields = parentsFilter == null ? new String[]{uniqueKeyField} : null; } @Override @@ -76,6 +87,37 @@ class ChildDocTransformer extends DocTransformer { @Override public boolean needsSolrIndexSearcher() { return true; } + @Override + public String[] getExtraRequestFields() { + return extraRequestedFields; + } + + private int getPrevRootGivenFilter(LeafReaderContext leafReaderContext, int segRootId) throws IOException { + final BitSet segParentsBitSet = parentsFilter.getBitSet(leafReaderContext); + if (segParentsBitSet != null) { + return segRootId == 0 ? -1 : segParentsBitSet.prevSetBit(segRootId - 1); + } + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, + "Parent filter '" + parentsFilter + "' doesn't match any parent documents"); + } + + private int getPrevRootGivenId(LeafReaderContext leafReaderContext, int segRootId, + BytesRef idBytes) throws IOException { + final LeafReader reader = leafReaderContext.reader(); + final Terms terms = reader.terms(IndexSchema.ROOT_FIELD_NAME); // never returns null here + final TermsEnum iterator = terms.iterator(); + if (iterator.seekExact(idBytes)) { + PostingsEnum docs = iterator.postings(null, PostingsEnum.NONE); + docs = BitsFilteredPostingsEnum.wrap(docs, reader.getLiveDocs()); + int id = docs.nextDoc(); + if (id != DocIdSetIterator.NO_MORE_DOCS) { + assert id <= segRootId : "integrity violation"; + return id - 1; + } + } + return segRootId - 1; // thus no child docs + } + @Override public void transform(SolrDocument rootDoc, int rootDocId) { // note: this algorithm works if both if we have have _nest_path_ and also if we don't! @@ -87,18 +129,25 @@ class ChildDocTransformer extends DocTransformer { final List leaves = searcher.getIndexReader().leaves(); final int seg = ReaderUtil.subIndex(rootDocId, leaves); final LeafReaderContext leafReaderContext = leaves.get(seg); + final Bits liveDocs = leafReaderContext.reader().getLiveDocs(); final int segBaseId = leafReaderContext.docBase; final int segRootId = rootDocId - segBaseId; - final BitSet segParentsBitSet = parentsFilter.getBitSet(leafReaderContext); - final Bits liveDocs = leafReaderContext.reader().getLiveDocs(); - if (segParentsBitSet == null) { - throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, - "Parent filter '" + parentsFilter + "' doesn't match any parent documents"); + // can return be -1 and that's okay (happens for very first block) + final int segPrevRootId; + if (parentsFilter != null) { + segPrevRootId = getPrevRootGivenFilter(leafReaderContext, segRootId); + } else { + final IndexSchema schema = searcher.getSchema(); + final String idStr = schema.printableUniqueKey(rootDoc); + if (idStr == null) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, + "[child] requires fl to include the ID"); + } + final BytesRef idBytes = schema.indexableUniqueKey(idStr); + segPrevRootId = getPrevRootGivenId(leafReaderContext, rootDocId, idBytes); } - final int segPrevRootId = segRootId==0? -1: segParentsBitSet.prevSetBit(segRootId - 1); // can return -1 and that's okay - if (segPrevRootId == (segRootId - 1)) { // doc has no children, return fast return; diff --git a/solr/core/src/java/org/apache/solr/response/transform/ChildDocTransformerFactory.java b/solr/core/src/java/org/apache/solr/response/transform/ChildDocTransformerFactory.java index f5e18542c9b..e841a59a798 100644 --- a/solr/core/src/java/org/apache/solr/response/transform/ChildDocTransformerFactory.java +++ b/solr/core/src/java/org/apache/solr/response/transform/ChildDocTransformerFactory.java @@ -41,7 +41,9 @@ import static org.apache.solr.schema.IndexSchema.NEST_PATH_FIELD_NAME; /** * Attaches all descendants (child documents) to each parent document. * - * The "parentFilter" parameter is mandatory if the schema is not of nest/hierarchy. + * Optionally you can provide a "parentFilter" param to designate which documents are the root + * documents (parent-most documents). Solr can figure this out on its own but you might want to + * specify it. * * Optionally you can provide a "childFilter" param to filter out which child documents should be returned and a * "limit" param which provides an option to specify the number of child documents @@ -96,10 +98,7 @@ public class ChildDocTransformerFactory extends TransformerFactory { // DocSet parentDocSet = req.getSearcher().getDocSet(parentFilterQuery); // then return BitSetProducer with custom BitSet impl accessing the docSet if (parentFilterStr == null) { - if (!buildHierarchy) { - throw new SolrException(ErrorCode.BAD_REQUEST, "Parent filter should be sent as parentFilter=filterCondition"); - } - parentsFilter = new QueryBitSetProducer(rootFilter); + parentsFilter = !buildHierarchy ? null : new QueryBitSetProducer(rootFilter); } else { if(buildHierarchy) { throw new SolrException(ErrorCode.BAD_REQUEST, "Parent filter should not be sent when the schema is nested"); @@ -137,7 +136,8 @@ public class ChildDocTransformerFactory extends TransformerFactory { int limit = params.getInt( "limit", 10 ); - return new ChildDocTransformer(field, parentsFilter, childDocSet, childSolrReturnFields, buildHierarchy, limit); + return new ChildDocTransformer(field, parentsFilter, childDocSet, childSolrReturnFields, + buildHierarchy, limit, req.getSchema().getUniqueKeyField().getName()); } private static Query parseQuery(String qstr, SolrQueryRequest req, String param) { diff --git a/solr/core/src/java/org/apache/solr/schema/IndexSchema.java b/solr/core/src/java/org/apache/solr/schema/IndexSchema.java index c2f3579ba1e..d41f395b12f 100644 --- a/solr/core/src/java/org/apache/solr/schema/IndexSchema.java +++ b/solr/core/src/java/org/apache/solr/schema/IndexSchema.java @@ -47,6 +47,7 @@ import org.apache.lucene.analysis.DelegatingAnalyzerWrapper; import org.apache.lucene.index.IndexableField; import org.apache.lucene.queries.payloads.PayloadDecoder; import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.Version; import org.apache.solr.common.ConfigNode; import org.apache.solr.common.MapSerializable; @@ -359,6 +360,11 @@ public class IndexSchema { } } + /** Given a readable/printable uniqueKey value, return an indexable version */ + public BytesRef indexableUniqueKey(String idStr) { + return new BytesRef(uniqueKeyFieldType.toInternal(idStr)); + } + private SchemaField getIndexedField(String fname) { SchemaField f = getFields().get(fname); if (f==null) { diff --git a/solr/core/src/test/org/apache/solr/response/transform/TestChildDocTransformer.java b/solr/core/src/test/org/apache/solr/response/transform/TestChildDocTransformer.java index 1a13a243103..48f2816d22e 100644 --- a/solr/core/src/test/org/apache/solr/response/transform/TestChildDocTransformer.java +++ b/solr/core/src/test/org/apache/solr/response/transform/TestChildDocTransformer.java @@ -96,13 +96,14 @@ public class TestChildDocTransformer extends SolrTestCaseJ4 { "/response/result/doc[1]/doc[2]/str[@name='id']='5'"}; assertQ(req("q", "*:*", "fq", "subject:\"parentDocument\" ", - "fl", "*,[child parentFilter=\"subject:parentDocument\"]"), test1); + "fl", "*,[child]"), test1); + + // shows parentFilter specified (not necessary any more) and also child + assertQ(req("q", "*:*", "fq", "subject:\"parentDocument\" ", + "fl", "id, subject,[child childFilter=\"title:foo\"]"), test2); assertQ(req("q", "*:*", "fq", "subject:\"parentDocument\" ", - "fl", "id, subject,[child parentFilter=\"subject:parentDocument\" childFilter=\"title:foo\"]"), test2); - - assertQ(req("q", "*:*", "fq", "subject:\"parentDocument\" ", - "fl", "id, subject,[child parentFilter=\"subject:parentDocument\" childFilter=\"title:bar\" limit=2]"), test3); + "fl", "id, subject,[child childFilter=\"title:bar\" limit=2]"), test3); SolrException e = expectThrows(SolrException.class, () -> { h.query(req("q", "*:*", "fq", "subject:\"parentDocument\" ", @@ -240,13 +241,13 @@ public class TestChildDocTransformer extends SolrTestCaseJ4 { }; assertJQ(req("q", "*:*", "fq", "subject:\"parentDocument\" ", - "fl", "*,[child parentFilter=\"subject:parentDocument\"]"), test1); + "fl", "*,[child]"), test1); assertJQ(req("q", "*:*", "fq", "subject:\"parentDocument\" ", - "fl", "id, subject,[child parentFilter=\"subject:parentDocument\" childFilter=\"title:foo\"]"), test2); + "fl", "id, subject,[child childFilter=\"title:foo\"]"), test2); assertJQ(req("q", "*:*", "fq", "subject:\"parentDocument\" ", - "fl", "id, subject,[child parentFilter=\"subject:parentDocument\" childFilter=\"title:bar\" limit=3]"), test3); + "fl", "id, subject,[child childFilter=\"title:bar\" limit=3]"), test3); } private void testChildDocNonStoredDVFields() throws Exception { @@ -271,26 +272,26 @@ public class TestChildDocTransformer extends SolrTestCaseJ4 { }; assertJQ(req("q", "*:*", "fq", "subject:\"parentDocument\" ", - "fl", "*,[child parentFilter=\"subject:parentDocument\"]"), test1); + "fl", "*,[child]"), test1); assertJQ(req("q", "*:*", "fq", "subject:\"parentDocument\" ", - "fl", "intDvoDefault, subject,[child parentFilter=\"subject:parentDocument\" childFilter=\"title:foo\"]"), test2); + "fl", "intDvoDefault, subject,[child childFilter=\"title:foo\"]"), test2); assertJQ(req("q", "*:*", "fq", "subject:\"parentDocument\" ", - "fl", "intDvoDefault, subject,[child parentFilter=\"subject:parentDocument\" childFilter=\"title:bar\" limit=2]"), test3); + "fl", "intDvoDefault, subject,[child childFilter=\"title:bar\" limit=2]"), test3); } private void testChildReturnFields() throws Exception { assertJQ(req("q", "*:*", "fq", "subject:\"parentDocument\" ", - "fl", "*,[child parentFilter=\"subject:parentDocument\" fl=\"intDvoDefault,child_fl:[value v='child_fl_test']\"]"), + "fl", "*,[child fl=\"intDvoDefault,child_fl:[value v='child_fl_test']\"]"), "/response/docs/[0]/intDefault==42", "/response/docs/[0]/_childDocuments_/[0]/intDvoDefault==42", "/response/docs/[0]/_childDocuments_/[0]/child_fl=='child_fl_test'"); try(SolrQueryRequest req = req("q", "*:*", "fq", "subject:\"parentDocument\" ", - "fl", "intDefault,[child parentFilter=\"subject:parentDocument\" fl=\"intDvoDefault, [docid]\"]")) { + "fl", "intDefault,[child fl=\"intDvoDefault, [docid]\"]")) { BasicResultContext res = (BasicResultContext) h.queryAndResponse("/select", req).getResponse(); Iterator docsStreamer = res.getProcessedDocuments(); while (docsStreamer.hasNext()) { @@ -401,6 +402,14 @@ public class TestChildDocTransformer extends SolrTestCaseJ4 { "fl", "id, cat, title, [child childFilter='cat:childDocument' parentFilter=\"subject:parentDocument\"]"), tests); + // shows if parentFilter matches all docs, then there are effectively no child docs + assertJQ(req("q", "*:*", + "sort", "id asc", + "fq", "subject:\"parentDocument\" ", + "fl", "id,[child childFilter='cat:childDocument' parentFilter=\"*:*\"]"), + "/response==" + + "{'numFound':2,'start':0,'numFoundExact':true,'docs':[{'id':'1'},{'id':'4'}]}"); + } private void testSubQueryParentFilterJSON() throws Exception { @@ -451,13 +460,13 @@ public class TestChildDocTransformer extends SolrTestCaseJ4 { assertQ(req("q", "*:*", "sort", "id asc", "fq", "subject:\"parentDocument\" ", - "fl", "*,[child childFilter='cat:childDocument' parentFilter=\"subject:parentDocument\"]"), + "fl", "*,[child childFilter='cat:childDocument']"), tests); assertQ(req("q", "*:*", "sort", "id asc", "fq", "subject:\"parentDocument\" ", - "fl", "id, cat, title, [child childFilter='cat:childDocument' parentFilter=\"subject:parentDocument\"]"), + "fl", "id, cat, title, [child childFilter='cat:childDocument']"), tests); } diff --git a/solr/solr-ref-guide/src/transforming-result-documents.adoc b/solr/solr-ref-guide/src/transforming-result-documents.adoc index 739409704b4..e1f2c5b42eb 100644 --- a/solr/solr-ref-guide/src/transforming-result-documents.adoc +++ b/solr/solr-ref-guide/src/transforming-result-documents.adoc @@ -132,14 +132,11 @@ Note that this transformer can be used even when the query used to match the res [source,plain] ---- -q=book_title:Solr&fl=id,[child parentFilter=doc_type:book childFilter=doc_type:chapter limit=100] +q=book_title:Solr&fl=id,[child childFilter=doc_type:chapter limit=100] ---- If the documents involved include a `\_nest_path_` field, then it is used to re-create the hierarchical structure of the descendent documents using the original psuedo-field names the documents were indexed with, otherwise the descendent documents are returned as a flat list of <>. -`parentFilter`:: -When using a schema that does _not_ include the `\_nest_path_` field, this parameter is mandatory, and serves the same purpose as the `of`/`which` parms in `{!child}`/`{!parent}` query parsers: to identify the set of "all parents" for the purpose of identifying the begining & end of each nested document block. *When a schema _does_ include a `\_nest_path_` field, this parameter is prohibited.* - `childFilter`:: A query to filter which child documents should be included. This can be particularly useful when you have multiple levels of hierarchical documents. The default is all children. @@ -151,6 +148,12 @@ The field list which the transformer is to return. The default is the top level + There is a further limitation in which the fields here should be a subset of those specified by the top level `fl` parameter. +`parentFilter`:: +Serves the same purpose as the `of`/`which` params in `{!child}`/`{!parent}` query parsers: to +identify the set of "all parents" for the purpose of identifying the beginning & end of each +nested document block. This recently became fully optional and appears to be obsolete. +It is likely to be removed in a future Solr release, so _if you find it has some use, let the +project know!_ [TIP] ====