SOLR-12519: child doc transformer can now produce a nested structure.

Fixed SolrDocument's confusion of field-attached child documents in addField()
Fixed AtomicUpdateDocumentMerger's confusion of field-attached child documents in isAtomicUpdate()
This commit is contained in:
David Smiley 2018-08-29 10:02:09 -04:00
parent 545b0dcd04
commit 5a0e7a615a
10 changed files with 796 additions and 133 deletions

View File

@ -158,6 +158,10 @@ New Features
* SOLR-12655: Add Korean morphological analyzer ("nori") to default distribution. This also adds examples
for configuration in Solr's schema. (Uwe Schindler)
* SOLR-12519: The [child] transformer now returns a nested child doc structure (attached as fields if provided this way)
provided the schema is enabled for nested documents. This is part of a broader enhancement of nested docs.
(Moshe Bla, David Smiley)
Bug Fixes
----------------------

View File

@ -0,0 +1,245 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.response.transform;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.Multimap;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.search.join.BitSetProducer;
import org.apache.lucene.util.BitSet;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SolrReturnFields;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.apache.solr.response.transform.ChildDocTransformerFactory.NUM_SEP_CHAR;
import static org.apache.solr.response.transform.ChildDocTransformerFactory.PATH_SEP_CHAR;
import static org.apache.solr.schema.IndexSchema.NEST_PATH_FIELD_NAME;
class ChildDocTransformer extends DocTransformer {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private static final String ANON_CHILD_KEY = "_childDocuments_";
private final String name;
private final BitSetProducer parentsFilter;
private final DocSet childDocSet;
private final int limit;
private final boolean isNestedSchema;
//TODO ought to be provided/configurable
private final SolrReturnFields childReturnFields = new SolrReturnFields();
ChildDocTransformer(String name, BitSetProducer parentsFilter,
DocSet childDocSet, boolean isNestedSchema, int limit) {
this.name = name;
this.parentsFilter = parentsFilter;
this.childDocSet = childDocSet;
this.limit = limit;
this.isNestedSchema = isNestedSchema;
}
@Override
public String getName() {
return name;
}
@Override
public void transform(SolrDocument rootDoc, int rootDocId) {
// note: this algorithm works if both if we have have _nest_path_ and also if we don't!
try {
// lookup what the *previous* rootDocId is, and figure which segment this is
final SolrIndexSearcher searcher = context.getSearcher();
final List<LeafReaderContext> leaves = searcher.getIndexReader().leaves();
final int seg = ReaderUtil.subIndex(rootDocId, leaves);
final LeafReaderContext leafReaderContext = leaves.get(seg);
final int segBaseId = leafReaderContext.docBase;
final int segRootId = rootDocId - segBaseId;
final BitSet segParentsBitSet = parentsFilter.getBitSet(leafReaderContext);
final int segPrevRootId = segRootId==0? -1: segParentsBitSet.prevSetBit(segRootId - 1); // can return -1 and that's okay
if (segPrevRootId == (segRootId - 1)) {
// doc has no children, return fast
return;
}
// we'll need this soon...
final SortedDocValues segPathDocValues = DocValues.getSorted(leafReaderContext.reader(), NEST_PATH_FIELD_NAME);
// passing a different SortedDocValues obj since the child documents which come after are of smaller docIDs,
// and the iterator can not be reversed.
// The root doc is the input document to be transformed, and is not necessarily the root doc of the block of docs.
final String rootDocPath = getPathByDocId(segRootId, DocValues.getSorted(leafReaderContext.reader(), NEST_PATH_FIELD_NAME));
// the key in the Map is the document's ancestors key (one above the parent), while the key in the intermediate
// MultiMap is the direct child document's key(of the parent document)
final Map<String, Multimap<String, SolrDocument>> pendingParentPathsToChildren = new HashMap<>();
final int firstChildId = segBaseId + segPrevRootId + 1;
int matches = 0;
// Loop each child ID up to the parent (exclusive).
for (int docId = firstChildId; docId < rootDocId; ++docId) {
// get the path. (note will default to ANON_CHILD_KEY if schema is not nested or empty string if blank)
final String fullDocPath = getPathByDocId(docId - segBaseId, segPathDocValues);
if (isNestedSchema && !fullDocPath.startsWith(rootDocPath)) {
// is not a descendant of the transformed doc; return fast.
continue;
}
// Is this doc a direct ancestor of another doc we've seen?
boolean isAncestor = pendingParentPathsToChildren.containsKey(fullDocPath);
// Do we need to do anything with this doc (either ancestor or matched the child query)
if (isAncestor || childDocSet == null || childDocSet.exists(docId)) {
// If we reached the limit, only add if it's an ancestor
if (limit != -1 && matches >= limit && !isAncestor) {
continue;
}
++matches; // note: includes ancestors that are not necessarily in childDocSet
// load the doc
SolrDocument doc = searcher.getDocFetcher().solrDoc(docId, childReturnFields);
if (isAncestor) {
// if this path has pending child docs, add them.
addChildrenToParent(doc, pendingParentPathsToChildren.remove(fullDocPath)); // no longer pending
}
// get parent path
String parentDocPath = getParentPath(fullDocPath);
String lastPath = getLastPath(fullDocPath);
// put into pending:
// trim path if the doc was inside array, see trimPathIfArrayDoc()
// e.g. toppings#1/ingredients#1 -> outer map key toppings#1
// -> inner MultiMap key ingredients
// or lonely#/lonelyGrandChild# -> outer map key lonely#
// -> inner MultiMap key lonelyGrandChild#
pendingParentPathsToChildren.computeIfAbsent(parentDocPath, x -> ArrayListMultimap.create())
.put(trimLastPoundIfArray(lastPath), doc); // multimap add (won't replace)
}
}
if (pendingParentPathsToChildren.isEmpty()) {
// no child docs matched the child filter; return fast.
return;
}
// only children of parent remain
assert pendingParentPathsToChildren.keySet().size() == 1;
// size == 1, so get the last remaining entry
addChildrenToParent(rootDoc, pendingParentPathsToChildren.values().iterator().next());
} catch (IOException e) {
//TODO DWS: reconsider this unusual error handling approach; shouldn't we rethrow?
log.warn("Could not fetch child documents", e);
rootDoc.put(getName(), "Could not fetch child documents");
}
}
private static void addChildrenToParent(SolrDocument parent, Multimap<String, SolrDocument> children) {
for (String childLabel : children.keySet()) {
addChildrenToParent(parent, children.get(childLabel), childLabel);
}
}
private static void addChildrenToParent(SolrDocument parent, Collection<SolrDocument> children, String cDocsPath) {
// if no paths; we do not need to add the child document's relation to its parent document.
if (cDocsPath.equals(ANON_CHILD_KEY)) {
parent.addChildDocuments(children);
return;
}
// lookup leaf key for these children using path
// depending on the label, add to the parent at the right key/label
String trimmedPath = trimLastPound(cDocsPath);
// if the child doc's path does not end with #, it is an array(same string is returned by ChildDocTransformer#trimLastPound)
if (!parent.containsKey(trimmedPath) && (trimmedPath == cDocsPath)) {
List<SolrDocument> list = new ArrayList<>(children);
parent.setField(trimmedPath, list);
return;
}
// is single value
parent.setField(trimmedPath, ((List)children).get(0));
}
private static String getLastPath(String path) {
int lastIndexOfPathSepChar = path.lastIndexOf(PATH_SEP_CHAR);
if(lastIndexOfPathSepChar == -1) {
return path;
}
return path.substring(lastIndexOfPathSepChar + 1);
}
private static String trimLastPoundIfArray(String path) {
// remove index after last pound sign and if there is an array index e.g. toppings#1 -> toppings
// or return original string if child doc is not in an array ingredients# -> ingredients#
final int indexOfSepChar = path.lastIndexOf(NUM_SEP_CHAR);
if (indexOfSepChar == -1) {
return path;
}
int lastIndex = path.length() - 1;
boolean singleDocVal = indexOfSepChar == lastIndex;
return singleDocVal ? path: path.substring(0, indexOfSepChar);
}
private static String trimLastPound(String path) {
// remove index after last pound sign and index from e.g. toppings#1 -> toppings
int lastIndex = path.lastIndexOf('#');
return lastIndex == -1 ? path : path.substring(0, lastIndex);
}
/**
* Returns the *parent* path for this document.
* Children of the root will yield null.
*/
private static String getParentPath(String currDocPath) {
// chop off leaf (after last '/')
// if child of leaf then return null (special value)
int lastPathIndex = currDocPath.lastIndexOf(PATH_SEP_CHAR);
return lastPathIndex == -1 ? null : currDocPath.substring(0, lastPathIndex);
}
/** Looks up the nest path. If there is none, returns {@link #ANON_CHILD_KEY}. */
private String getPathByDocId(int segDocId, SortedDocValues segPathDocValues) throws IOException {
if (!isNestedSchema) {
return ANON_CHILD_KEY;
}
int numToAdvance = segPathDocValues.docID() == -1 ? segDocId : segDocId - (segPathDocValues.docID());
assert numToAdvance >= 0;
boolean advanced = segPathDocValues.advanceExact(segDocId);
return advanced ? segPathDocValues.binaryValue().utf8ToString(): "";
}
}

View File

@ -17,39 +17,31 @@
package org.apache.solr.response.transform;
import java.io.IOException;
import java.util.Set;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.DocValuesFieldExistsQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.join.BitSetProducer;
import org.apache.lucene.search.join.QueryBitSetProducer;
import org.apache.lucene.search.join.ToChildBlockJoinQuery;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.client.solrj.util.ClientUtils;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.DocsStreamer;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocList;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.QParser;
import org.apache.solr.search.SolrDocumentFetcher;
import org.apache.solr.search.SolrReturnFields;
import org.apache.solr.search.SyntaxError;
import static org.apache.solr.schema.IndexSchema.NEST_PATH_FIELD_NAME;
/**
* Attaches all descendants (child documents) to each parent document.
*
* @since solr 4.9
* The "parentFilter" parameter is mandatory if the schema is not of nest/hierarchy.
*
* This transformer returns all descendants of each parent document in a flat list nested inside the parent document.
*
*
* The "parentFilter" parameter is mandatory.
* Optionally you can provide a "childFilter" param to filter out which child documents should be returned and a
* "limit" param which provides an option to specify the number of child documents
* to be returned per parent document. By default it's set to 10.
@ -58,121 +50,92 @@ import org.apache.solr.search.SyntaxError;
* [child parentFilter="fieldName:fieldValue"]
* [child parentFilter="fieldName:fieldValue" childFilter="fieldName:fieldValue"]
* [child parentFilter="fieldName:fieldValue" childFilter="fieldName:fieldValue" limit=20]
*
* @since solr 4.9
*/
public class ChildDocTransformerFactory extends TransformerFactory {
static final char PATH_SEP_CHAR = '/';
static final char NUM_SEP_CHAR = '#';
private static final BooleanQuery rootFilter = new BooleanQuery.Builder()
.add(new BooleanClause(new MatchAllDocsQuery(), BooleanClause.Occur.MUST))
.add(new BooleanClause(new DocValuesFieldExistsQuery(NEST_PATH_FIELD_NAME), BooleanClause.Occur.MUST_NOT)).build();
@Override
public DocTransformer create(String field, SolrParams params, SolrQueryRequest req) {
SchemaField uniqueKeyField = req.getSchema().getUniqueKeyField();
if(uniqueKeyField == null) {
if (uniqueKeyField == null) {
throw new SolrException( ErrorCode.BAD_REQUEST,
" ChildDocTransformer requires the schema to have a uniqueKeyField." );
}
// Do we build a hierarchy or flat list of child docs (attached anonymously)?
boolean buildHierarchy = req.getSchema().hasExplicitField(NEST_PATH_FIELD_NAME);
String parentFilter = params.get( "parentFilter" );
if( parentFilter == null ) {
throw new SolrException( ErrorCode.BAD_REQUEST, "Parent filter should be sent as parentFilter=filterCondition" );
}
String childFilter = params.get( "childFilter" );
int limit = params.getInt( "limit", 10 );
BitSetProducer parentsFilter = null;
try {
Query parentFilterQuery = QParser.getParser( parentFilter, req).getQuery();
//TODO shouldn't we try to use the Solr filter cache, and then ideally implement
String parentFilterStr = params.get( "parentFilter" );
BitSetProducer parentsFilter;
// TODO reuse org.apache.solr.search.join.BlockJoinParentQParser.getCachedFilter (uses a cache)
// TODO shouldn't we try to use the Solr filter cache, and then ideally implement
// BitSetProducer over that?
// DocSet parentDocSet = req.getSearcher().getDocSet(parentFilterQuery);
// then return BitSetProducer with custom BitSet impl accessing the docSet
parentsFilter = new QueryBitSetProducer(parentFilterQuery);
} catch (SyntaxError syntaxError) {
throw new SolrException( ErrorCode.BAD_REQUEST, "Failed to create correct parent filter query" );
if (parentFilterStr == null) {
if (!buildHierarchy) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Parent filter should be sent as parentFilter=filterCondition");
}
parentsFilter = new QueryBitSetProducer(rootFilter);
} else {
if(buildHierarchy) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Parent filter should not be sent when the schema is nested");
}
parentsFilter = new QueryBitSetProducer(parseQuery(parentFilterStr, req, "parentFilter"));
}
Query childFilterQuery = null;
if(childFilter != null) {
String childFilterStr = params.get( "childFilter" );
DocSet childDocSet;
if (childFilterStr == null) {
childDocSet = null;
} else {
if (buildHierarchy) {
childFilterStr = processPathHierarchyQueryString(childFilterStr);
}
Query childFilter = parseQuery(childFilterStr, req, "childFilter");
try {
childFilterQuery = QParser.getParser( childFilter, req).getQuery();
} catch (SyntaxError syntaxError) {
throw new SolrException( ErrorCode.BAD_REQUEST, "Failed to create correct child filter query" );
}
}
return new ChildDocTransformer( field, parentsFilter, uniqueKeyField, req.getSchema(), childFilterQuery, limit);
}
}
class ChildDocTransformer extends DocTransformer {
private final String name;
private final SchemaField idField;
private final IndexSchema schema;
private BitSetProducer parentsFilter;
private Query childFilterQuery;
private int limit;
public ChildDocTransformer( String name, final BitSetProducer parentsFilter,
final SchemaField idField, IndexSchema schema,
final Query childFilterQuery, int limit) {
this.name = name;
this.idField = idField;
this.schema = schema;
this.parentsFilter = parentsFilter;
this.childFilterQuery = childFilterQuery;
this.limit = limit;
}
@Override
public String getName() {
return name;
}
@Override
public String[] getExtraRequestFields() {
// we always need the idField (of the parent) in order to fill out it's children
return new String[] { idField.getName() };
}
@Override
public void transform(SolrDocument doc, int docid) {
FieldType idFt = idField.getType();
Object parentIdField = doc.getFirstValue(idField.getName());
String parentIdExt = parentIdField instanceof IndexableField
? idFt.toExternal((IndexableField)parentIdField)
: parentIdField.toString();
try {
Query parentQuery = idFt.getFieldQuery(null, idField, parentIdExt);
Query query = new ToChildBlockJoinQuery(parentQuery, parentsFilter);
DocList children = context.getSearcher().getDocList(query, childFilterQuery, new Sort(), 0, limit);
if(children.matches() > 0) {
SolrDocumentFetcher docFetcher = context.getSearcher().getDocFetcher();
Set<String> dvFieldsToReturn = docFetcher.getNonStoredDVs(true);
boolean shouldDecorateWithDVs = dvFieldsToReturn.size() > 0;
DocIterator i = children.iterator();
while(i.hasNext()) {
Integer childDocNum = i.next();
Document childDoc = context.getSearcher().doc(childDocNum);
// TODO: future enhancement...
// support an fl local param in the transformer, which is used to build
// a private ReturnFields instance that we use to prune unwanted field
// names from solrChildDoc
SolrDocument solrChildDoc = DocsStreamer.convertLuceneDocToSolrDoc(childDoc, schema,
new SolrReturnFields());
if (shouldDecorateWithDVs) {
docFetcher.decorateDocValueFields(solrChildDoc, childDocNum, dvFieldsToReturn);
}
doc.addChildDocument(solrChildDoc);
}
}
childDocSet = req.getSearcher().getDocSet(childFilter);
} catch (IOException e) {
doc.put(name, "Could not fetch child Documents");
throw new SolrException(ErrorCode.SERVER_ERROR, e);
}
}
int limit = params.getInt( "limit", 10 );
return new ChildDocTransformer(field, parentsFilter, childDocSet, buildHierarchy, limit);
}
private static Query parseQuery(String qstr, SolrQueryRequest req, String param) {
try {
return QParser.getParser(qstr, req).getQuery();
} catch (SyntaxError syntaxError) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Failed to parse '" + param + "' param.");
}
}
// NOTE: THIS FEATURE IS PRESENTLY EXPERIMENTAL; WAIT TO SEE IT IN THE REF GUIDE. FINAL SYNTAX IS TBD.
protected static String processPathHierarchyQueryString(String queryString) {
// if the filter includes a path string, build a lucene query string to match those specific child documents.
// e.g. toppings/ingredients/name_s:cocoa -> +_nest_path_:"toppings/ingredients/" +(name_s:cocoa)
int indexOfFirstColon = queryString.indexOf(':');
if (indexOfFirstColon <= 0) {
return queryString;// give up
}
int indexOfLastPathSepChar = queryString.lastIndexOf(PATH_SEP_CHAR, indexOfFirstColon);
if (indexOfLastPathSepChar < 0) {
return queryString;
}
String path = queryString.substring(0, indexOfLastPathSepChar + 1);
String remaining = queryString.substring(indexOfLastPathSepChar + 1);
return
"+" + NEST_PATH_FIELD_NAME + ":" + ClientUtils.escapeQueryChars(path)
+ " +(" + remaining + ")";
}
}

View File

@ -32,6 +32,7 @@ import java.util.regex.Pattern;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.solr.common.SolrDocumentBase;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.SolrInputDocument;
@ -73,7 +74,8 @@ public class AtomicUpdateDocumentMerger {
public static boolean isAtomicUpdate(final AddUpdateCommand cmd) {
SolrInputDocument sdoc = cmd.getSolrInputDocument();
for (SolrInputField sif : sdoc.values()) {
if (sif.getValue() instanceof Map) {
Object val = sif.getValue();
if (val instanceof Map && !(val instanceof SolrDocumentBase)) {
return true;
}
}

View File

@ -0,0 +1,65 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<schema name="nested-docs" version="1.6">
<field name="id" type="string" indexed="true" stored="true" multiValued="false" required="true"/>
<field name="id_i" type="int" indexed="true" multiValued="false" docValues="true" stored="false" useDocValuesAsStored="false" />
<!-- copy id field as int -->
<copyField source="id" dest="id_i"/>
<!-- for versioning -->
<field name="_version_" type="long" indexed="false" stored="false" docValues="true"/>
<!-- points to the root document of a block of nested documents -->
<field name="_root_" type="string" indexed="true" stored="true"/>
<!-- required for NestedUpdateProcessor -->
<field name="_nest_parent_" type="string" indexed="true" stored="true"/>
<field name="_nest_path_" type="descendants_path" indexed="true" multiValued="false" docValues="true" stored="false" useDocValuesAsStored="false"/>
<dynamicField name="*_s" type="string" indexed="true" stored="true"/>
<dynamicField name="*_ss" type="string" indexed="true" stored="true" multiValued="true"/>
<fieldType name="string" class="solr.StrField" sortMissingLast="true"/>
<!-- Point Fields -->
<fieldType name="int" class="solr.IntPointField" docValues="true"/>
<fieldType name="long" class="solr.LongPointField" docValues="true"/>
<fieldType name="double" class="solr.DoublePointField" docValues="true"/>
<fieldType name="float" class="solr.FloatPointField" docValues="true"/>
<fieldType name="date" class="solr.DatePointField" docValues="true"/>
<fieldType name="descendants_path" class="solr.SortableTextField">
<analyzer type="index">
<!--char filter to append / to path in the indexed form e.g. toppings/ingredients turns to toppings/ingredients/ -->
<charFilter class="solr.PatternReplaceCharFilterFactory" pattern="(^.*.*$)" replacement="$0/"/>
<!--tokenize the path so path queries are optimized -->
<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/"/>
<!--remove the # and digit index of array from path toppings#1/ingredients#/ turns to toppings/ingredients/ -->
<filter class="solr.PatternReplaceFilterFactory" pattern="[#*\d]*" replace="all"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.KeywordTokenizerFactory"/>
</analyzer>
</fieldType>
<uniqueKey>id</uniqueKey>
</schema>

View File

@ -565,9 +565,6 @@
<field name="_version_" type="long" indexed="false" stored="false" docValues="true"/>
<!-- points to the root document of a block of nested documents -->
<field name="_root_" type="string" indexed="true" stored="true"/>
<!-- required for NestedUpdateProcessor -->
<field name="_nest_parent_" type="string" indexed="true" stored="true"/>
<field name="_nest_path_" type="string" indexed="true" stored="true"/>
<field name="multi_int_with_docvals" type="tint" multiValued="true" docValues="true" indexed="false"/>

View File

@ -30,7 +30,7 @@ public class TestChildDocTransformer extends SolrTestCaseJ4 {
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig.xml","schema.xml");
initCore("solrconfig.xml","schema.xml"); // *not* the "nest" schema version
}
@After
@ -63,7 +63,7 @@ public class TestChildDocTransformer extends SolrTestCaseJ4 {
testChildDocNonStoredDVFields();
}
private void testChildDoctransformerXML() {
private void testChildDoctransformerXML() throws Exception {
String test1[] = new String[] {
"//*[@numFound='1']",
"/response/result/doc[1]/doc[1]/str[@name='id']='2'" ,
@ -81,8 +81,9 @@ public class TestChildDocTransformer extends SolrTestCaseJ4 {
String test3[] = new String[] {
"//*[@numFound='1']",
"count(/response/result/doc[1]/doc)=2",
"/response/result/doc[1]/doc[1]/str[@name='id']='3'" ,
"/response/result/doc[1]/doc[2]/str[@name='id']='5'" };
"/response/result/doc[1]/doc[2]/str[@name='id']='5'"};
@ -214,7 +215,7 @@ public class TestChildDocTransformer extends SolrTestCaseJ4 {
"fl", "subject,[child parentFilter=\"subject:parentDocument\" childFilter=\"title:foo\"]"), test2);
assertJQ(req("q", "*:*", "fq", "subject:\"parentDocument\" ",
"fl", "subject,[child parentFilter=\"subject:parentDocument\" childFilter=\"title:bar\" limit=2]"), test3);
"fl", "subject,[child parentFilter=\"subject:parentDocument\" childFilter=\"title:bar\" limit=3]"), test3);
}
private void testChildDocNonStoredDVFields() throws Exception {

View File

@ -0,0 +1,386 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.response.transform;
import java.util.Collection;
import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
import com.google.common.collect.Iterables;
import org.apache.lucene.index.IndexableField;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrException;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.BasicResultContext;
import org.junit.After;
import org.junit.BeforeClass;
import org.junit.Test;
public class TestChildDocTransformerHierarchy extends SolrTestCaseJ4 {
private static AtomicInteger idCounter = new AtomicInteger();
private static final String[] types = {"donut", "cake"};
private static final String[] ingredients = {"flour", "cocoa", "vanilla"};
private static final Iterator<String> ingredientsCycler = Iterables.cycle(ingredients).iterator();
private static final String[] names = {"Yaz", "Jazz", "Costa"};
private static final String[] fieldsToRemove = {"_nest_parent_", "_nest_path_", "_root_"};
private static final int sumOfDocsPerNestedDocument = 8;
private static final int numberOfDocsPerNestedTest = 10;
private static int firstTestedDocId = 0;
private static String fqToExcludeNonTestedDocs; // filter documents that were created for random segments to ensure the transformer works with multiple segments.
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig-update-processor-chains.xml", "schema-nest.xml"); // use "nest" schema
final boolean useSegments = random().nextBoolean();
if(useSegments) {
// create random segments
final int numOfDocs = 10;
for(int i = 0; i < numOfDocs; ++i) {
updateJ(generateDocHierarchy(i), params("update.chain", "nested"));
if(random().nextBoolean()) {
assertU(commit());
}
}
assertU(commit());
fqToExcludeNonTestedDocs = "{!frange l=" + firstTestedDocId + " incl=false}id_i";
} else {
fqToExcludeNonTestedDocs = "*:*";
}
firstTestedDocId = idCounter.get();
}
@After
public void after() throws Exception {
assertU(delQ(fqToExcludeNonTestedDocs));
assertU(commit());
idCounter.set(firstTestedDocId); // reset idCounter
}
@Test
public void testParentFilterJSON() throws Exception {
indexSampleData(numberOfDocsPerNestedTest);
String[] tests = new String[] {
"/response/docs/[0]/type_s==donut",
"/response/docs/[0]/toppings/[0]/type_s==Regular",
"/response/docs/[0]/toppings/[1]/type_s==Chocolate",
"/response/docs/[0]/toppings/[0]/ingredients/[0]/name_s==cocoa",
"/response/docs/[0]/toppings/[1]/ingredients/[1]/name_s==cocoa",
"/response/docs/[0]/lonely/test_s==testing",
"/response/docs/[0]/lonely/lonelyGrandChild/test2_s==secondTest",
};
try(SolrQueryRequest req = req("q", "type_s:donut", "sort", "id asc",
"fl", "*, _nest_path_, [child]", "fq", fqToExcludeNonTestedDocs)) {
BasicResultContext res = (BasicResultContext) h.queryAndResponse("/select", req).getResponse();
Iterator<SolrDocument> docsStreamer = res.getProcessedDocuments();
while (docsStreamer.hasNext()) {
SolrDocument doc = docsStreamer.next();
cleanSolrDocumentFields(doc);
int currDocId = Integer.parseInt((doc.getFirstValue("id")).toString());
assertEquals("queried docs are not equal to expected output for id: " + currDocId, fullNestedDocTemplate(currDocId), doc.toString());
}
}
assertJQ(req("q", "type_s:donut",
"sort", "id asc",
"fl", "*, _nest_path_, [child]",
"fq", fqToExcludeNonTestedDocs),
tests);
}
@Test
public void testParentFilterLimitJSON() throws Exception {
indexSampleData(numberOfDocsPerNestedTest);
try(SolrQueryRequest req = req("q", "type_s:donut", "sort", "id asc", "fl", "id, type_s, toppings, _nest_path_, [child childFilter='_nest_path_:\"toppings/\"' limit=1]",
"fq", fqToExcludeNonTestedDocs)) {
BasicResultContext res = (BasicResultContext) h.queryAndResponse("/select", req).getResponse();
Iterator<SolrDocument> docsStreamer = res.getProcessedDocuments();
while (docsStreamer.hasNext()) {
SolrDocument doc = docsStreamer.next();
cleanSolrDocumentFields(doc);
assertFalse("root doc should not have anonymous child docs", doc.hasChildDocuments());
assertEquals("should only have 1 child doc", 1, doc.getFieldValues("toppings").size());
}
}
assertJQ(req("q", "type_s:donut",
"sort", "id asc",
"fl", "*, [child limit=1]",
"fq", fqToExcludeNonTestedDocs),
"/response/docs/[0]/type_s==donut",
"/response/docs/[0]/lonely/test_s==testing",
"/response/docs/[0]/lonely/lonelyGrandChild/test2_s==secondTest",
// "!" (negate): don't find toppings. The "limit" kept us from reaching these, which follow lonely.
"!/response/docs/[0]/toppings/[0]/type_s==Regular"
);
}
@Test
public void testChildFilterLimitJSON() throws Exception {
indexSampleData(numberOfDocsPerNestedTest);
try(SolrQueryRequest req = req("q", "type_s:donut", "sort", "id asc", "fl", "*, _nest_path_, " +
"[child limit='1' childFilter='toppings/type_s:Regular']", "fq", fqToExcludeNonTestedDocs)) {
BasicResultContext res = (BasicResultContext) h.queryAndResponse("/select", req).getResponse();
Iterator<SolrDocument> docsStreamer = res.getProcessedDocuments();
while (docsStreamer.hasNext()) {
SolrDocument doc = docsStreamer.next();
cleanSolrDocumentFields(doc);
assertFalse("root doc should not have anonymous child docs", doc.hasChildDocuments());
assertEquals("should only have 1 child doc", 1, doc.getFieldValues("toppings").size());
assertEquals("should be of type_s:Regular", "Regular", ((SolrDocument) doc.getFirstValue("toppings")).getFieldValue("type_s"));
}
}
assertJQ(req("q", "type_s:donut",
"sort", "id asc",
"fl", "id, type_s, toppings, _nest_path_, [child limit='10' childFilter='toppings/type_s:Regular']",
"fq", fqToExcludeNonTestedDocs),
"/response/docs/[0]/type_s==donut",
"/response/docs/[0]/toppings/[0]/type_s==Regular");
}
@Test
public void testExactPath() throws Exception {
indexSampleData(2);
String[] tests = {
"/response/numFound==4",
"/response/docs/[0]/_nest_path_=='toppings#0'",
"/response/docs/[1]/_nest_path_=='toppings#0'",
"/response/docs/[2]/_nest_path_=='toppings#1'",
"/response/docs/[3]/_nest_path_=='toppings#1'",
};
assertJQ(req("q", "_nest_path_:*toppings/",
"sort", "_nest_path_ asc",
"fl", "*, id_i, _nest_path_",
"fq", fqToExcludeNonTestedDocs),
tests);
assertJQ(req("q", "+_nest_path_:\"toppings/\"",
"sort", "_nest_path_ asc",
"fl", "*, _nest_path_",
"fq", fqToExcludeNonTestedDocs),
tests);
}
@Test
public void testChildFilterJSON() throws Exception {
indexSampleData(numberOfDocsPerNestedTest);
String[] tests = new String[] {
"/response/docs/[0]/type_s==donut",
"/response/docs/[0]/toppings/[0]/type_s==Regular",
};
assertJQ(req("q", "type_s:donut",
"sort", "id asc",
"fl", "*,[child childFilter='toppings/type_s:Regular']",
"fq", fqToExcludeNonTestedDocs),
tests);
}
@Test
public void testGrandChildFilterJSON() throws Exception {
indexSampleData(numberOfDocsPerNestedTest);
String[] tests = new String[] {
"/response/docs/[0]/type_s==donut",
"/response/docs/[0]/toppings/[0]/ingredients/[0]/name_s==cocoa"
};
try(SolrQueryRequest req = req("q", "type_s:donut", "sort", "id asc",
"fl", "*,[child childFilter='toppings/ingredients/name_s:cocoa'],", "fq", fqToExcludeNonTestedDocs)) {
BasicResultContext res = (BasicResultContext) h.queryAndResponse("/select", req).getResponse();
Iterator<SolrDocument> docsStreamer = res.getProcessedDocuments();
while (docsStreamer.hasNext()) {
SolrDocument doc = docsStreamer.next();
cleanSolrDocumentFields(doc);
int currDocId = Integer.parseInt((doc.getFirstValue("id")).toString());
assertEquals("queried docs are not equal to expected output for id: " + currDocId, grandChildDocTemplate(currDocId), doc.toString());
}
}
assertJQ(req("q", "type_s:donut",
"sort", "id asc",
"fl", "*,[child childFilter='toppings/ingredients/name_s:cocoa']",
"fq", fqToExcludeNonTestedDocs),
tests);
}
@Test
public void testSingularChildFilterJSON() throws Exception {
indexSampleData(numberOfDocsPerNestedTest);
String[] tests = new String[] {
"/response/docs/[0]/type_s==cake",
"/response/docs/[0]/lonely/test_s==testing",
"/response/docs/[0]/lonely/lonelyGrandChild/test2_s==secondTest"
};
assertJQ(req("q", "type_s:cake",
"sort", "id asc",
"fl", "*,[child childFilter='lonely/lonelyGrandChild/test2_s:secondTest']",
"fq", fqToExcludeNonTestedDocs),
tests);
}
@Test
public void testNonRootChildren() throws Exception {
indexSampleData(numberOfDocsPerNestedTest);
assertJQ(req("q", "test_s:testing",
"sort", "id asc",
"fl", "*,[child childFilter='lonely/lonelyGrandChild/test2_s:secondTest']",
"fq", fqToExcludeNonTestedDocs),
"/response/docs/[0]/test_s==testing",
"/response/docs/[0]/lonelyGrandChild/test2_s==secondTest");
assertJQ(req("q", "type_s:Chocolate",
"sort", "id asc",
"fl", "*,[child]",
"fq", fqToExcludeNonTestedDocs),
"/response/docs/[0]/type_s==Chocolate",
"/response/docs/[0]/ingredients/[0]/name_s==cocoa",
"/response/docs/[0]/ingredients/[1]/name_s==cocoa");
}
@Test
public void testExceptionThrownWParentFilter() throws Exception {
expectThrows(SolrException.class,
"Exception was not thrown when parentFilter param was passed to ChildDocTransformer using a nested schema",
() -> assertJQ(req("q", "test_s:testing",
"sort", "id asc",
"fl", "*,[child childFilter='lonely/lonelyGrandChild/test2_s:secondTest' parentFilter='_nest_path_:\"lonely/\"']",
"fq", fqToExcludeNonTestedDocs),
"/response/docs/[0]/test_s==testing",
"/response/docs/[0]/lonelyGrandChild/test2_s==secondTest")
);
}
@Test
public void testNoChildren() throws Exception {
final String addDocWoChildren =
"{\n" +
"\"add\": {\n" +
"\"doc\": {\n" +
"\"id\": " + id() + ", \n" +
"\"type_s\": \"cake\", \n" +
"}\n" +
"}\n" +
"}";
updateJ(addDocWoChildren, params("update.chain", "nested"));
assertU(commit());
assertJQ(req("q", "type_s:cake",
"sort", "id asc",
"fl", "*,[child childFilter='lonely/lonelyGrandChild/test2_s:secondTest']",
"fq", fqToExcludeNonTestedDocs),
"/response/docs/[0]/type_s==cake");
}
private void indexSampleData(int numDocs) throws Exception {
for(int i = 0; i < numDocs; ++i) {
updateJ(generateDocHierarchy(i), params("update.chain", "nested"));
}
assertU(commit());
}
private static int id() {
return idCounter.incrementAndGet();
}
private static void cleanSolrDocumentFields(SolrDocument input) {
for(String fieldName: fieldsToRemove) {
input.removeFields(fieldName);
}
for(Map.Entry<String, Object> field: input) {
Object val = field.getValue();
if(val instanceof Collection) {
Object newVals = ((Collection) val).stream().map((item) -> (cleanIndexableField(item)))
.collect(Collectors.toList());
input.setField(field.getKey(), newVals);
continue;
}
input.setField(field.getKey(), cleanIndexableField(field.getValue()));
}
}
private static Object cleanIndexableField(Object field) {
if(field instanceof IndexableField) {
return ((IndexableField) field).stringValue();
} else if(field instanceof SolrDocument) {
cleanSolrDocumentFields((SolrDocument) field);
}
return field;
}
private static String grandChildDocTemplate(int id) {
final int docNum = (id - firstTestedDocId) / sumOfDocsPerNestedDocument; // the index of docs sent to solr in the AddUpdateCommand. e.g. first doc is 0
return
"SolrDocument{id="+ id + ", type_s=" + types[docNum % types.length] + ", name_s=" + names[docNum % names.length] + ", " +
"toppings=[" +
"SolrDocument{id=" + (id + 3) + ", type_s=Regular, " +
"ingredients=[SolrDocument{id=" + (id + 4) + ", name_s=cocoa}]}, " +
"SolrDocument{id=" + (id + 5) + ", type_s=Chocolate, " +
"ingredients=[SolrDocument{id=" + (id + 6) + ", name_s=cocoa}, SolrDocument{id=" + (id + 7) + ", name_s=cocoa}]}]}";
}
private static String fullNestedDocTemplate(int id) {
final int docNum = (id - firstTestedDocId) / sumOfDocsPerNestedDocument; // the index of docs sent to solr in the AddUpdateCommand. e.g. first doc is 0
boolean doubleIngredient = docNum % 2 == 0;
String currIngredient = doubleIngredient ? ingredients[1]: ingredientsCycler.next();
return
"SolrDocument{id=" + id + ", type_s=" + types[docNum % types.length] + ", name_s=" + names[docNum % names.length] + ", " +
"lonely=SolrDocument{id=" + (id + 1) + ", test_s=testing, " +
"lonelyGrandChild=SolrDocument{id=" + (id + 2) + ", test2_s=secondTest}}, " +
"toppings=[" +
"SolrDocument{id=" + (id + 3) + ", type_s=Regular, " +
"ingredients=[SolrDocument{id=" + (id + 4) + ", name_s=" + currIngredient + "}]}, " +
"SolrDocument{id=" + (id + 5) + ", type_s=Chocolate, " +
"ingredients=[SolrDocument{id=" + (id + 6) + ", name_s=cocoa}, SolrDocument{id=" + (id + 7) + ", name_s=cocoa}]}]}";
}
private static String generateDocHierarchy(int i) {
boolean doubleIngredient = i % 2 == 0;
String currIngredient = doubleIngredient ? ingredients[1]: ingredientsCycler.next();
return "{\n" +
"\"add\": {\n" +
"\"doc\": {\n" +
"\"id\": " + id() + ", \n" +
"\"type_s\": \"" + types[i % types.length] + "\", \n" +
"\"lonely\": {\"id\": " + id() + ", \"test_s\": \"testing\", \"lonelyGrandChild\": {\"id\": " + id() + ", \"test2_s\": \"secondTest\"}}, \n" +
"\"name_s\": " + names[i % names.length] +
"\"toppings\": [ \n" +
"{\"id\": " + id() + ", \"type_s\":\"Regular\"," +
"\"ingredients\": [{\"id\": " + id() + "," +
"\"name_s\": \"" + currIngredient + "\"}]" +
"},\n" +
"{\"id\": " + id() + ", \"type_s\":\"Chocolate\"," +
"\"ingredients\": [{\"id\": " + id() + "," +
"\"name_s\": \"" + ingredients[1] + "\"}," +
"{\"id\": " + id() + ",\n" + "\"name_s\": \"" + ingredients[1] +"\"" +
"}]" +
"}]\n" +
"}\n" +
"}\n" +
"}";
}
}

View File

@ -90,7 +90,7 @@ public class TestNestedUpdateProcessor extends SolrTestCaseJ4 {
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig-update-processor-chains.xml", "schema15.xml");
initCore("solrconfig-update-processor-chains.xml", "schema-nest.xml");
}
@Before
@ -107,8 +107,8 @@ public class TestNestedUpdateProcessor extends SolrTestCaseJ4 {
};
indexSampleData(jDoc);
assertJQ(req("q", IndexSchema.NEST_PATH_FIELD_NAME + ":*/grandChild#*",
"fl","*",
assertJQ(req("q", IndexSchema.NEST_PATH_FIELD_NAME + ":*/grandChild",
"fl","*, _nest_path_",
"sort","id desc",
"wt","json"),
tests);
@ -124,14 +124,14 @@ public class TestNestedUpdateProcessor extends SolrTestCaseJ4 {
};
indexSampleData(jDoc);
assertJQ(req("q", IndexSchema.NEST_PATH_FIELD_NAME + ":children#?",
"fl","*",
assertJQ(req("q", IndexSchema.NEST_PATH_FIELD_NAME + ":children/",
"fl","*, _nest_path_",
"sort","id asc",
"wt","json"),
childrenTests);
assertJQ(req("q", IndexSchema.NEST_PATH_FIELD_NAME + ":anotherChildList#?",
"fl","*",
assertJQ(req("q", IndexSchema.NEST_PATH_FIELD_NAME + ":anotherChildList/",
"fl","*, _nest_path_",
"sort","id asc",
"wt","json"),
"/response/docs/[0]/id=='4'",

View File

@ -105,7 +105,7 @@ public class SolrDocument extends SolrDocumentBase<Object, SolrDocument> impleme
else if( value instanceof NamedList ) {
// nothing
}
else if( value instanceof Iterable ) {
else if( value instanceof Iterable && !(value instanceof SolrDocumentBase)) {
ArrayList<Object> lst = new ArrayList<>();
for( Object o : (Iterable)value ) {
lst.add( o );
@ -154,7 +154,7 @@ public class SolrDocument extends SolrDocumentBase<Object, SolrDocument> impleme
}
// Add the values to the collection
if( value instanceof Iterable ) {
if( value instanceof Iterable && !(value instanceof SolrDocumentBase)) {
for( Object o : (Iterable<Object>)value ) {
vals.add( o );
}