From 38292f791eaddb6e652b36200388b6a66486ae37 Mon Sep 17 00:00:00 2001 From: Yonik Seeley Date: Sat, 13 Jun 2015 22:32:48 +0000 Subject: [PATCH] SOLR-7676: nested docs facet domain switching git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1685340 13f79535-47bb-0310-9956-ffa450edef68 --- solr/CHANGES.txt | 7 ++ .../org/apache/solr/search/QueryContext.java | 8 +- .../apache/solr/search/SolrIndexSearcher.java | 19 +++ .../apache/solr/search/facet/BlockJoin.java | 77 ++++++++++++ .../apache/solr/search/facet/FacetQuery.java | 1 + .../solr/search/facet/FacetRequest.java | 115 +++++++++++++++++- .../solr/search/facet/TestJsonFacets.java | 76 ++++++++++++ 7 files changed, 295 insertions(+), 8 deletions(-) create mode 100644 solr/core/src/java/org/apache/solr/search/facet/BlockJoin.java diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 61ab8b84503..45ad1d7bbd3 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -116,6 +116,13 @@ New Features * SOLR-7458: Expose HDFS Block Locality Metrics via JMX (Mike Drob via Mark Miller) +* SOLR-7676: Faceting on nested objects / Block-join faceting with the new JSON Facet API. + Example: Assuming books with nested pages and an input domain of pages, the following + will switch the domain to books before faceting on the author field: + authors:{ type:terms, field:author, domain:{toParent:"type:book"} } + (yonik) + + Bug Fixes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/search/QueryContext.java b/solr/core/src/java/org/apache/solr/search/QueryContext.java index 93f93fc7dd3..367c2ded2da 100644 --- a/solr/core/src/java/org/apache/solr/search/QueryContext.java +++ b/solr/core/src/java/org/apache/solr/search/QueryContext.java @@ -26,8 +26,10 @@ import org.apache.solr.common.SolrException; import org.apache.solr.core.SolrCore; import org.apache.solr.request.SolrRequestInfo; -/** - * Bridge between old style context and a real class +/* + * Bridge between old style context and a real class. + * This is currently slightly more heavy weight than necessary because of the need to inherit from IdentityHashMap rather than + * instantiate it on demand (and the need to put "searcher" in the map) * @lucene.experimental */ public class QueryContext extends IdentityHashMap implements Closeable { @@ -45,7 +47,7 @@ public class QueryContext extends IdentityHashMap implements Closeable { public QueryContext(IndexSearcher searcher) { this.searcher = searcher instanceof SolrIndexSearcher ? (SolrIndexSearcher)searcher : null; indexSearcher = searcher; - this.put("searcher", searcher); // see ValueSource.newContext() + this.put("searcher", searcher); // see ValueSource.newContext() // TODO: move check to "get"? } diff --git a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java index f37af0a0d70..13c9f6fcddb 100644 --- a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java +++ b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java @@ -873,6 +873,25 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn getDocSet(query); } + public BitDocSet getDocSetBits(Query q) throws IOException { + DocSet answer = getDocSet(q); + if (answer instanceof BitDocSet) { + return (BitDocSet)answer; + } + + FixedBitSet bs = new FixedBitSet(maxDoc()); + DocIterator iter = answer.iterator(); + while (iter.hasNext()) { + bs.set(iter.nextDoc()); + } + + BitDocSet answerBits = new BitDocSet(bs , answer.size()); + if (filterCache != null) { + filterCache.put(q, answerBits); + } + return answerBits; + } + /** * Returns the set of document ids matching a query. * This method is cache-aware and attempts to retrieve the answer from the cache if possible. diff --git a/solr/core/src/java/org/apache/solr/search/facet/BlockJoin.java b/solr/core/src/java/org/apache/solr/search/facet/BlockJoin.java new file mode 100644 index 00000000000..e4f75f4e554 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/BlockJoin.java @@ -0,0 +1,77 @@ +package org.apache.solr.search.facet; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +import java.io.IOException; + +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.util.FixedBitSet; +import org.apache.solr.search.BitDocSet; +import org.apache.solr.search.DocIterator; +import org.apache.solr.search.DocSet; +import org.apache.solr.search.DocSetCollector; +import org.apache.solr.search.QueryContext; + +/** @lucene.experimental */ +public class BlockJoin { + + /** acceptDocs will normally be used to avoid deleted documents from being generated as part of the answer DocSet (just use *:*) + * although it can be used to further constrain the generated documents. + */ + public static DocSet toChildren(DocSet parentInput, BitDocSet parentList, DocSet acceptDocs, QueryContext qcontext) throws IOException { + FixedBitSet parentBits = parentList.getBits(); + DocSetCollector collector = new DocSetCollector(qcontext.searcher().maxDoc()>>6, qcontext.searcher().maxDoc()); + DocIterator iter = parentInput.iterator(); + while (iter.hasNext()) { + int parentDoc = iter.nextDoc(); + if (!parentList.exists(parentDoc) || parentDoc == 0) { // test for parentDoc==0 here to avoid passing -1 to prevSetBit later on + // not a parent, or parent has no children + continue; + } + int prevParent = parentBits.prevSetBit(parentDoc - 1); + for (int childDoc = prevParent+1; childDoc>6, qcontext.searcher().maxDoc()); + DocIterator iter = childInput.iterator(); + int currentParent = -1; + while (iter.hasNext()) { + int childDoc = iter.nextDoc(); // TODO: skipping + if (childDoc <= currentParent) { // use <= since we also allow parents in the input + // we already visited this parent + continue; + } + currentParent = parentBits.nextSetBit(childDoc); + if (currentParent != DocIdSetIterator.NO_MORE_DOCS) { + // only collect the parent the first time we skip to it + collector.collect( currentParent ); + } + } + return collector.getDocSet(); + } + +} diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetQuery.java b/solr/core/src/java/org/apache/solr/search/facet/FacetQuery.java index 0e25947133d..909a5ee6920 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FacetQuery.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetQuery.java @@ -59,3 +59,4 @@ class FacetQueryProcessor extends FacetProcessor { } + diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java b/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java index 0eec43100b9..424826d6892 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java @@ -31,6 +31,7 @@ import java.util.Map; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Query; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.FacetParams; @@ -41,6 +42,7 @@ import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrRequestInfo; import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.SchemaField; +import org.apache.solr.search.BitDocSet; import org.apache.solr.search.DocIterator; import org.apache.solr.search.DocSet; import org.apache.solr.search.FunctionQParser; @@ -54,8 +56,17 @@ import org.apache.solr.search.SyntaxError; public abstract class FacetRequest { protected Map facetStats; // per-bucket statistics protected Map subFacets; // list of facets - protected List excludeTags; + protected List filters; protected boolean processEmpty; + protected Domain domain; + + // domain changes + public static class Domain { + public List excludeTags; + public boolean toParent; + public boolean toChildren; + public String parents; + } public FacetRequest() { facetStats = new LinkedHashMap<>(); @@ -140,7 +151,42 @@ class FacetProcessor { } protected void handleDomainChanges() throws IOException { - if (freq.excludeTags == null || freq.excludeTags.size() == 0) { + if (freq.domain == null) return; + handleFilterExclusions(); + handleBlockJoin(); + } + + private void handleBlockJoin() throws IOException { + if (!(freq.domain.toChildren || freq.domain.toParent)) return; + + // TODO: avoid query parsing per-bucket somehow... + String parentStr = freq.domain.parents; + Query parentQuery; + try { + QParser parser = QParser.getParser(parentStr, null, fcontext.req); + parentQuery = parser.getQuery(); + } catch (SyntaxError err) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error parsing block join parent specification: " + parentStr); + } + + BitDocSet parents = fcontext.searcher.getDocSetBits(parentQuery); + DocSet input = fcontext.base; + DocSet result; + + if (freq.domain.toChildren) { + DocSet filt = fcontext.searcher.getDocSetBits( new MatchAllDocsQuery() ); + result = BlockJoin.toChildren(input, parents, filt, fcontext.qcontext); + } else { + result = BlockJoin.toParents(input, parents, fcontext.qcontext); + } + + fcontext.base = result; + } + + private void handleFilterExclusions() throws IOException { + List excludeTags = freq.domain.excludeTags; + + if (excludeTags == null || excludeTags.size() == 0) { return; } @@ -153,7 +199,7 @@ class FacetProcessor { } IdentityHashMap excludeSet = new IdentityHashMap<>(); - for (String excludeTag : freq.excludeTags) { + for (String excludeTag : excludeTags) { Object olst = tagMap.get(excludeTag); // tagMap has entries of List>, but subject to change in the future if (!(olst instanceof Collection)) continue; @@ -487,7 +533,7 @@ abstract class FacetParser { } else if ("query".equals(type)) { return parseQueryFacet(key, args); } else if ("range".equals(type)) { - return parseRangeFacet(key, args); + return parseRangeFacet(key, args); } return parseStat(key, type, args); @@ -528,10 +574,41 @@ abstract class FacetParser { } + private FacetRequest.Domain getDomain() { + if (facet.domain == null) { + facet.domain = new FacetRequest.Domain(); + } + return facet.domain; + } + protected void parseCommonParams(Object o) { if (o instanceof Map) { Map m = (Map)o; - facet.excludeTags = getStringList(m, "excludeTags"); + List excludeTags = getStringList(m, "excludeTags"); + if (excludeTags != null) { + getDomain().excludeTags = excludeTags; + } + + Map domainMap = (Map) m.get("domain"); + if (domainMap != null) { + excludeTags = getStringList(m, "excludeTags"); + if (excludeTags != null) { + getDomain().excludeTags = excludeTags; + } + + String blockParent = (String)domainMap.get("blockParent"); + String blockChildren = (String)domainMap.get("blockChildren"); + + if (blockParent != null) { + getDomain().toParent = true; + getDomain().parents = blockParent; + } else if (blockChildren != null) { + getDomain().toChildren = true; + getDomain().parents = blockChildren; + } + + } + } } @@ -696,6 +773,34 @@ class FacetQueryParser extends FacetParser { } } +/*** not a separate type of parser for now... +class FacetBlockParentParser extends FacetParser { + public FacetBlockParentParser(FacetParser parent, String key) { + super(parent, key); + facet = new FacetBlockParent(); + } + + @Override + public FacetBlockParent parse(Object arg) throws SyntaxError { + parseCommonParams(arg); + + if (arg instanceof String) { + // just the field name... + facet.parents = (String)arg; + + } else if (arg instanceof Map) { + Map m = (Map) arg; + facet.parents = getString(m, "parents", null); + + parseSubs( m.get("facet") ); + } + + return facet; + } +} +***/ + + class FacetFieldParser extends FacetParser { public FacetFieldParser(FacetParser parent, String key) { super(parent, key); diff --git a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java index 9df7453a779..3e6d67d5a5d 100644 --- a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java +++ b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java @@ -1072,6 +1072,82 @@ public class TestJsonFacets extends SolrTestCaseHS { ); } + + + @Test + public void testBlockJoin() throws Exception { + doBlockJoin(Client.localClient()); + } + + public void doBlockJoin(Client client) throws Exception { + ModifiableSolrParams p = params("rows","0"); + + client.deleteByQuery("*:*", null); + + SolrInputDocument parent; + parent = sdoc("id", "1", "type_s","book", "book_s","A", "v_t","q"); + client.add(parent, null); + + parent = sdoc("id", "2", "type_s","book", "book_s","B", "v_t","q w"); + parent.addChildDocument( sdoc("id","2.1", "type_s","page", "page_s","a", "v_t","x y z") ); + parent.addChildDocument( sdoc("id","2.2", "type_s","page", "page_s","b", "v_t","x y ") ); + parent.addChildDocument( sdoc("id","2.3", "type_s","page", "page_s","c", "v_t"," y z" ) ); + client.add(parent, null); + + parent = sdoc("id", "3", "type_s","book", "book_s","C", "v_t","q w e"); + parent.addChildDocument( sdoc("id","3.1", "type_s","page", "page_s","d", "v_t","x ") ); + parent.addChildDocument( sdoc("id","3.2", "type_s","page", "page_s","e", "v_t"," y ") ); + parent.addChildDocument( sdoc("id","3.3", "type_s","page", "page_s","f", "v_t"," z") ); + client.add(parent, null); + + parent = sdoc("id", "4", "type_s","book", "book_s","D", "v_t","e"); + client.add(parent, null); + + client.commit(); + + client.testJQ(params(p, "q", "*:*" + , "json.facet", "{ " + + "pages:{ type:query, domain:{blockChildren:'type_s:book'} , facet:{ x:{field:v_t} } }" + + ",pages2:{type:terms, field:v_t, domain:{blockChildren:'type_s:book'} }" + + ",books:{ type:query, domain:{blockParent:'type_s:book'} , facet:{ x:{field:v_t} } }" + + ",books2:{type:terms, field:v_t, domain:{blockParent:'type_s:book'} }" + + ",pageof3:{ type:query, q:'id:3', facet : { x : { type:terms, field:page_s, domain:{blockChildren:'type_s:book'}}} }" + + ",bookof22:{ type:query, q:'id:2.2', facet : { x : { type:terms, field:book_s, domain:{blockParent:'type_s:book'}}} }" + + ",missing_blockParent:{ type:query, domain:{blockParent:'type_s:does_not_exist'} }" + + ",missing_blockChildren:{ type:query, domain:{blockChildren:'type_s:does_not_exist'} }" + + "}" + ) + , "facets=={ count:10" + + ", pages:{count:6 , x:{buckets:[ {val:y,count:4},{val:x,count:3},{val:z,count:3} ]} }" + + ", pages2:{ buckets:[ {val:y,count:4},{val:x,count:3},{val:z,count:3} ] }" + + ", books:{count:4 , x:{buckets:[ {val:q,count:3},{val:e,count:2},{val:w,count:2} ]} }" + + ", books2:{ buckets:[ {val:q,count:3},{val:e,count:2},{val:w,count:2} ] }" + + ", pageof3:{count:1 , x:{buckets:[ {val:d,count:1},{val:e,count:1},{val:f,count:1} ]} }" + + ", bookof22:{count:1 , x:{buckets:[ {val:B,count:1} ]} }" + + ", missing_blockParent:{count:0}" + + ", missing_blockChildren:{count:0}" + + "}" + ); + + // no matches in base query + client.testJQ(params("q", "no_match_s:NO_MATCHES" + , "json.facet", "{ processEmpty:true," + + "pages:{ type:query, domain:{blockChildren:'type_s:book'} }" + + ",books:{ type:query, domain:{blockParent:'type_s:book'} }" + + "}" + ) + , "facets=={ count:0" + + ", pages:{count:0}" + + ", books:{count:0}" + + "}" + ); + + + } + + + + public void XtestPercentiles() { AVLTreeDigest catA = new AVLTreeDigest(100); catA.add(4);