SOLR-7676: nested docs facet domain switching

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1685340 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2015-06-13 22:32:48 +00:00
parent b202035b93
commit 38292f791e
7 changed files with 295 additions and 8 deletions

View File

@ -116,6 +116,13 @@ New Features
* SOLR-7458: Expose HDFS Block Locality Metrics via JMX (Mike Drob via Mark Miller)
* SOLR-7676: Faceting on nested objects / Block-join faceting with the new JSON Facet API.
Example: Assuming books with nested pages and an input domain of pages, the following
will switch the domain to books before faceting on the author field:
authors:{ type:terms, field:author, domain:{toParent:"type:book"} }
(yonik)
Bug Fixes
----------------------

View File

@ -26,8 +26,10 @@ import org.apache.solr.common.SolrException;
import org.apache.solr.core.SolrCore;
import org.apache.solr.request.SolrRequestInfo;
/**
* Bridge between old style context and a real class
/*
* Bridge between old style context and a real class.
* This is currently slightly more heavy weight than necessary because of the need to inherit from IdentityHashMap rather than
* instantiate it on demand (and the need to put "searcher" in the map)
* @lucene.experimental
*/
public class QueryContext extends IdentityHashMap implements Closeable {
@ -45,7 +47,7 @@ public class QueryContext extends IdentityHashMap implements Closeable {
public QueryContext(IndexSearcher searcher) {
this.searcher = searcher instanceof SolrIndexSearcher ? (SolrIndexSearcher)searcher : null;
indexSearcher = searcher;
this.put("searcher", searcher); // see ValueSource.newContext()
this.put("searcher", searcher); // see ValueSource.newContext() // TODO: move check to "get"?
}

View File

@ -873,6 +873,25 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
getDocSet(query);
}
public BitDocSet getDocSetBits(Query q) throws IOException {
DocSet answer = getDocSet(q);
if (answer instanceof BitDocSet) {
return (BitDocSet)answer;
}
FixedBitSet bs = new FixedBitSet(maxDoc());
DocIterator iter = answer.iterator();
while (iter.hasNext()) {
bs.set(iter.nextDoc());
}
BitDocSet answerBits = new BitDocSet(bs , answer.size());
if (filterCache != null) {
filterCache.put(q, answerBits);
}
return answerBits;
}
/**
* Returns the set of document ids matching a query.
* This method is cache-aware and attempts to retrieve the answer from the cache if possible.

View File

@ -0,0 +1,77 @@
package org.apache.solr.search.facet;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.FixedBitSet;
import org.apache.solr.search.BitDocSet;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.DocSetCollector;
import org.apache.solr.search.QueryContext;
/** @lucene.experimental */
public class BlockJoin {
/** acceptDocs will normally be used to avoid deleted documents from being generated as part of the answer DocSet (just use *:*)
* although it can be used to further constrain the generated documents.
*/
public static DocSet toChildren(DocSet parentInput, BitDocSet parentList, DocSet acceptDocs, QueryContext qcontext) throws IOException {
FixedBitSet parentBits = parentList.getBits();
DocSetCollector collector = new DocSetCollector(qcontext.searcher().maxDoc()>>6, qcontext.searcher().maxDoc());
DocIterator iter = parentInput.iterator();
while (iter.hasNext()) {
int parentDoc = iter.nextDoc();
if (!parentList.exists(parentDoc) || parentDoc == 0) { // test for parentDoc==0 here to avoid passing -1 to prevSetBit later on
// not a parent, or parent has no children
continue;
}
int prevParent = parentBits.prevSetBit(parentDoc - 1);
for (int childDoc = prevParent+1; childDoc<parentDoc; childDoc++) {
if (acceptDocs != null && !acceptDocs.exists(childDoc)) continue; // only select live docs
collector.collect(childDoc);
}
}
return collector.getDocSet();
}
/** childInput may also contain parents (i.e. a parent or below will all roll up to that parent) */
public static DocSet toParents(DocSet childInput, BitDocSet parentList, QueryContext qcontext) throws IOException {
FixedBitSet parentBits = parentList.getBits();
DocSetCollector collector = new DocSetCollector(qcontext.searcher().maxDoc()>>6, qcontext.searcher().maxDoc());
DocIterator iter = childInput.iterator();
int currentParent = -1;
while (iter.hasNext()) {
int childDoc = iter.nextDoc(); // TODO: skipping
if (childDoc <= currentParent) { // use <= since we also allow parents in the input
// we already visited this parent
continue;
}
currentParent = parentBits.nextSetBit(childDoc);
if (currentParent != DocIdSetIterator.NO_MORE_DOCS) {
// only collect the parent the first time we skip to it
collector.collect( currentParent );
}
}
return collector.getDocSet();
}
}

View File

@ -59,3 +59,4 @@ class FacetQueryProcessor extends FacetProcessor<FacetQuery> {
}

View File

@ -31,6 +31,7 @@ import java.util.Map;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.FacetParams;
@ -41,6 +42,7 @@ import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrRequestInfo;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.BitDocSet;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.FunctionQParser;
@ -54,8 +56,17 @@ import org.apache.solr.search.SyntaxError;
public abstract class FacetRequest {
protected Map<String,AggValueSource> facetStats; // per-bucket statistics
protected Map<String,FacetRequest> subFacets; // list of facets
protected List<String> excludeTags;
protected List<String> filters;
protected boolean processEmpty;
protected Domain domain;
// domain changes
public static class Domain {
public List<String> excludeTags;
public boolean toParent;
public boolean toChildren;
public String parents;
}
public FacetRequest() {
facetStats = new LinkedHashMap<>();
@ -140,7 +151,42 @@ class FacetProcessor<FacetRequestT extends FacetRequest> {
}
protected void handleDomainChanges() throws IOException {
if (freq.excludeTags == null || freq.excludeTags.size() == 0) {
if (freq.domain == null) return;
handleFilterExclusions();
handleBlockJoin();
}
private void handleBlockJoin() throws IOException {
if (!(freq.domain.toChildren || freq.domain.toParent)) return;
// TODO: avoid query parsing per-bucket somehow...
String parentStr = freq.domain.parents;
Query parentQuery;
try {
QParser parser = QParser.getParser(parentStr, null, fcontext.req);
parentQuery = parser.getQuery();
} catch (SyntaxError err) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error parsing block join parent specification: " + parentStr);
}
BitDocSet parents = fcontext.searcher.getDocSetBits(parentQuery);
DocSet input = fcontext.base;
DocSet result;
if (freq.domain.toChildren) {
DocSet filt = fcontext.searcher.getDocSetBits( new MatchAllDocsQuery() );
result = BlockJoin.toChildren(input, parents, filt, fcontext.qcontext);
} else {
result = BlockJoin.toParents(input, parents, fcontext.qcontext);
}
fcontext.base = result;
}
private void handleFilterExclusions() throws IOException {
List<String> excludeTags = freq.domain.excludeTags;
if (excludeTags == null || excludeTags.size() == 0) {
return;
}
@ -153,7 +199,7 @@ class FacetProcessor<FacetRequestT extends FacetRequest> {
}
IdentityHashMap<Query,Boolean> excludeSet = new IdentityHashMap<>();
for (String excludeTag : freq.excludeTags) {
for (String excludeTag : excludeTags) {
Object olst = tagMap.get(excludeTag);
// tagMap has entries of List<String,List<QParser>>, but subject to change in the future
if (!(olst instanceof Collection)) continue;
@ -487,7 +533,7 @@ abstract class FacetParser<FacetRequestT extends FacetRequest> {
} else if ("query".equals(type)) {
return parseQueryFacet(key, args);
} else if ("range".equals(type)) {
return parseRangeFacet(key, args);
return parseRangeFacet(key, args);
}
return parseStat(key, type, args);
@ -528,10 +574,41 @@ abstract class FacetParser<FacetRequestT extends FacetRequest> {
}
private FacetRequest.Domain getDomain() {
if (facet.domain == null) {
facet.domain = new FacetRequest.Domain();
}
return facet.domain;
}
protected void parseCommonParams(Object o) {
if (o instanceof Map) {
Map<String,Object> m = (Map<String,Object>)o;
facet.excludeTags = getStringList(m, "excludeTags");
List<String> excludeTags = getStringList(m, "excludeTags");
if (excludeTags != null) {
getDomain().excludeTags = excludeTags;
}
Map<String,Object> domainMap = (Map<String,Object>) m.get("domain");
if (domainMap != null) {
excludeTags = getStringList(m, "excludeTags");
if (excludeTags != null) {
getDomain().excludeTags = excludeTags;
}
String blockParent = (String)domainMap.get("blockParent");
String blockChildren = (String)domainMap.get("blockChildren");
if (blockParent != null) {
getDomain().toParent = true;
getDomain().parents = blockParent;
} else if (blockChildren != null) {
getDomain().toChildren = true;
getDomain().parents = blockChildren;
}
}
}
}
@ -696,6 +773,34 @@ class FacetQueryParser extends FacetParser<FacetQuery> {
}
}
/*** not a separate type of parser for now...
class FacetBlockParentParser extends FacetParser<FacetBlockParent> {
public FacetBlockParentParser(FacetParser parent, String key) {
super(parent, key);
facet = new FacetBlockParent();
}
@Override
public FacetBlockParent parse(Object arg) throws SyntaxError {
parseCommonParams(arg);
if (arg instanceof String) {
// just the field name...
facet.parents = (String)arg;
} else if (arg instanceof Map) {
Map<String, Object> m = (Map<String, Object>) arg;
facet.parents = getString(m, "parents", null);
parseSubs( m.get("facet") );
}
return facet;
}
}
***/
class FacetFieldParser extends FacetParser<FacetField> {
public FacetFieldParser(FacetParser parent, String key) {
super(parent, key);

View File

@ -1072,6 +1072,82 @@ public class TestJsonFacets extends SolrTestCaseHS {
);
}
@Test
public void testBlockJoin() throws Exception {
doBlockJoin(Client.localClient());
}
public void doBlockJoin(Client client) throws Exception {
ModifiableSolrParams p = params("rows","0");
client.deleteByQuery("*:*", null);
SolrInputDocument parent;
parent = sdoc("id", "1", "type_s","book", "book_s","A", "v_t","q");
client.add(parent, null);
parent = sdoc("id", "2", "type_s","book", "book_s","B", "v_t","q w");
parent.addChildDocument( sdoc("id","2.1", "type_s","page", "page_s","a", "v_t","x y z") );
parent.addChildDocument( sdoc("id","2.2", "type_s","page", "page_s","b", "v_t","x y ") );
parent.addChildDocument( sdoc("id","2.3", "type_s","page", "page_s","c", "v_t"," y z" ) );
client.add(parent, null);
parent = sdoc("id", "3", "type_s","book", "book_s","C", "v_t","q w e");
parent.addChildDocument( sdoc("id","3.1", "type_s","page", "page_s","d", "v_t","x ") );
parent.addChildDocument( sdoc("id","3.2", "type_s","page", "page_s","e", "v_t"," y ") );
parent.addChildDocument( sdoc("id","3.3", "type_s","page", "page_s","f", "v_t"," z") );
client.add(parent, null);
parent = sdoc("id", "4", "type_s","book", "book_s","D", "v_t","e");
client.add(parent, null);
client.commit();
client.testJQ(params(p, "q", "*:*"
, "json.facet", "{ " +
"pages:{ type:query, domain:{blockChildren:'type_s:book'} , facet:{ x:{field:v_t} } }" +
",pages2:{type:terms, field:v_t, domain:{blockChildren:'type_s:book'} }" +
",books:{ type:query, domain:{blockParent:'type_s:book'} , facet:{ x:{field:v_t} } }" +
",books2:{type:terms, field:v_t, domain:{blockParent:'type_s:book'} }" +
",pageof3:{ type:query, q:'id:3', facet : { x : { type:terms, field:page_s, domain:{blockChildren:'type_s:book'}}} }" +
",bookof22:{ type:query, q:'id:2.2', facet : { x : { type:terms, field:book_s, domain:{blockParent:'type_s:book'}}} }" +
",missing_blockParent:{ type:query, domain:{blockParent:'type_s:does_not_exist'} }" +
",missing_blockChildren:{ type:query, domain:{blockChildren:'type_s:does_not_exist'} }" +
"}"
)
, "facets=={ count:10" +
", pages:{count:6 , x:{buckets:[ {val:y,count:4},{val:x,count:3},{val:z,count:3} ]} }" +
", pages2:{ buckets:[ {val:y,count:4},{val:x,count:3},{val:z,count:3} ] }" +
", books:{count:4 , x:{buckets:[ {val:q,count:3},{val:e,count:2},{val:w,count:2} ]} }" +
", books2:{ buckets:[ {val:q,count:3},{val:e,count:2},{val:w,count:2} ] }" +
", pageof3:{count:1 , x:{buckets:[ {val:d,count:1},{val:e,count:1},{val:f,count:1} ]} }" +
", bookof22:{count:1 , x:{buckets:[ {val:B,count:1} ]} }" +
", missing_blockParent:{count:0}" +
", missing_blockChildren:{count:0}" +
"}"
);
// no matches in base query
client.testJQ(params("q", "no_match_s:NO_MATCHES"
, "json.facet", "{ processEmpty:true," +
"pages:{ type:query, domain:{blockChildren:'type_s:book'} }" +
",books:{ type:query, domain:{blockParent:'type_s:book'} }" +
"}"
)
, "facets=={ count:0" +
", pages:{count:0}" +
", books:{count:0}" +
"}"
);
}
public void XtestPercentiles() {
AVLTreeDigest catA = new AVLTreeDigest(100);
catA.add(4);