mirror of https://github.com/apache/lucene.git
SOLR-7676: nested docs facet domain switching
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1685340 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b202035b93
commit
38292f791e
|
@ -116,6 +116,13 @@ New Features
|
|||
|
||||
* SOLR-7458: Expose HDFS Block Locality Metrics via JMX (Mike Drob via Mark Miller)
|
||||
|
||||
* SOLR-7676: Faceting on nested objects / Block-join faceting with the new JSON Facet API.
|
||||
Example: Assuming books with nested pages and an input domain of pages, the following
|
||||
will switch the domain to books before faceting on the author field:
|
||||
authors:{ type:terms, field:author, domain:{toParent:"type:book"} }
|
||||
(yonik)
|
||||
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -26,8 +26,10 @@ import org.apache.solr.common.SolrException;
|
|||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.request.SolrRequestInfo;
|
||||
|
||||
/**
|
||||
* Bridge between old style context and a real class
|
||||
/*
|
||||
* Bridge between old style context and a real class.
|
||||
* This is currently slightly more heavy weight than necessary because of the need to inherit from IdentityHashMap rather than
|
||||
* instantiate it on demand (and the need to put "searcher" in the map)
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class QueryContext extends IdentityHashMap implements Closeable {
|
||||
|
@ -45,7 +47,7 @@ public class QueryContext extends IdentityHashMap implements Closeable {
|
|||
public QueryContext(IndexSearcher searcher) {
|
||||
this.searcher = searcher instanceof SolrIndexSearcher ? (SolrIndexSearcher)searcher : null;
|
||||
indexSearcher = searcher;
|
||||
this.put("searcher", searcher); // see ValueSource.newContext()
|
||||
this.put("searcher", searcher); // see ValueSource.newContext() // TODO: move check to "get"?
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -873,6 +873,25 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
|
|||
getDocSet(query);
|
||||
}
|
||||
|
||||
public BitDocSet getDocSetBits(Query q) throws IOException {
|
||||
DocSet answer = getDocSet(q);
|
||||
if (answer instanceof BitDocSet) {
|
||||
return (BitDocSet)answer;
|
||||
}
|
||||
|
||||
FixedBitSet bs = new FixedBitSet(maxDoc());
|
||||
DocIterator iter = answer.iterator();
|
||||
while (iter.hasNext()) {
|
||||
bs.set(iter.nextDoc());
|
||||
}
|
||||
|
||||
BitDocSet answerBits = new BitDocSet(bs , answer.size());
|
||||
if (filterCache != null) {
|
||||
filterCache.put(q, answerBits);
|
||||
}
|
||||
return answerBits;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the set of document ids matching a query.
|
||||
* This method is cache-aware and attempts to retrieve the answer from the cache if possible.
|
||||
|
|
|
@ -0,0 +1,77 @@
|
|||
package org.apache.solr.search.facet;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.solr.search.BitDocSet;
|
||||
import org.apache.solr.search.DocIterator;
|
||||
import org.apache.solr.search.DocSet;
|
||||
import org.apache.solr.search.DocSetCollector;
|
||||
import org.apache.solr.search.QueryContext;
|
||||
|
||||
/** @lucene.experimental */
|
||||
public class BlockJoin {
|
||||
|
||||
/** acceptDocs will normally be used to avoid deleted documents from being generated as part of the answer DocSet (just use *:*)
|
||||
* although it can be used to further constrain the generated documents.
|
||||
*/
|
||||
public static DocSet toChildren(DocSet parentInput, BitDocSet parentList, DocSet acceptDocs, QueryContext qcontext) throws IOException {
|
||||
FixedBitSet parentBits = parentList.getBits();
|
||||
DocSetCollector collector = new DocSetCollector(qcontext.searcher().maxDoc()>>6, qcontext.searcher().maxDoc());
|
||||
DocIterator iter = parentInput.iterator();
|
||||
while (iter.hasNext()) {
|
||||
int parentDoc = iter.nextDoc();
|
||||
if (!parentList.exists(parentDoc) || parentDoc == 0) { // test for parentDoc==0 here to avoid passing -1 to prevSetBit later on
|
||||
// not a parent, or parent has no children
|
||||
continue;
|
||||
}
|
||||
int prevParent = parentBits.prevSetBit(parentDoc - 1);
|
||||
for (int childDoc = prevParent+1; childDoc<parentDoc; childDoc++) {
|
||||
if (acceptDocs != null && !acceptDocs.exists(childDoc)) continue; // only select live docs
|
||||
collector.collect(childDoc);
|
||||
}
|
||||
}
|
||||
return collector.getDocSet();
|
||||
}
|
||||
|
||||
/** childInput may also contain parents (i.e. a parent or below will all roll up to that parent) */
|
||||
public static DocSet toParents(DocSet childInput, BitDocSet parentList, QueryContext qcontext) throws IOException {
|
||||
FixedBitSet parentBits = parentList.getBits();
|
||||
DocSetCollector collector = new DocSetCollector(qcontext.searcher().maxDoc()>>6, qcontext.searcher().maxDoc());
|
||||
DocIterator iter = childInput.iterator();
|
||||
int currentParent = -1;
|
||||
while (iter.hasNext()) {
|
||||
int childDoc = iter.nextDoc(); // TODO: skipping
|
||||
if (childDoc <= currentParent) { // use <= since we also allow parents in the input
|
||||
// we already visited this parent
|
||||
continue;
|
||||
}
|
||||
currentParent = parentBits.nextSetBit(childDoc);
|
||||
if (currentParent != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
// only collect the parent the first time we skip to it
|
||||
collector.collect( currentParent );
|
||||
}
|
||||
}
|
||||
return collector.getDocSet();
|
||||
}
|
||||
|
||||
}
|
|
@ -59,3 +59,4 @@ class FacetQueryProcessor extends FacetProcessor<FacetQuery> {
|
|||
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -31,6 +31,7 @@ import java.util.Map;
|
|||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.params.FacetParams;
|
||||
|
@ -41,6 +42,7 @@ import org.apache.solr.request.SolrQueryRequest;
|
|||
import org.apache.solr.request.SolrRequestInfo;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.search.BitDocSet;
|
||||
import org.apache.solr.search.DocIterator;
|
||||
import org.apache.solr.search.DocSet;
|
||||
import org.apache.solr.search.FunctionQParser;
|
||||
|
@ -54,8 +56,17 @@ import org.apache.solr.search.SyntaxError;
|
|||
public abstract class FacetRequest {
|
||||
protected Map<String,AggValueSource> facetStats; // per-bucket statistics
|
||||
protected Map<String,FacetRequest> subFacets; // list of facets
|
||||
protected List<String> excludeTags;
|
||||
protected List<String> filters;
|
||||
protected boolean processEmpty;
|
||||
protected Domain domain;
|
||||
|
||||
// domain changes
|
||||
public static class Domain {
|
||||
public List<String> excludeTags;
|
||||
public boolean toParent;
|
||||
public boolean toChildren;
|
||||
public String parents;
|
||||
}
|
||||
|
||||
public FacetRequest() {
|
||||
facetStats = new LinkedHashMap<>();
|
||||
|
@ -140,7 +151,42 @@ class FacetProcessor<FacetRequestT extends FacetRequest> {
|
|||
}
|
||||
|
||||
protected void handleDomainChanges() throws IOException {
|
||||
if (freq.excludeTags == null || freq.excludeTags.size() == 0) {
|
||||
if (freq.domain == null) return;
|
||||
handleFilterExclusions();
|
||||
handleBlockJoin();
|
||||
}
|
||||
|
||||
private void handleBlockJoin() throws IOException {
|
||||
if (!(freq.domain.toChildren || freq.domain.toParent)) return;
|
||||
|
||||
// TODO: avoid query parsing per-bucket somehow...
|
||||
String parentStr = freq.domain.parents;
|
||||
Query parentQuery;
|
||||
try {
|
||||
QParser parser = QParser.getParser(parentStr, null, fcontext.req);
|
||||
parentQuery = parser.getQuery();
|
||||
} catch (SyntaxError err) {
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error parsing block join parent specification: " + parentStr);
|
||||
}
|
||||
|
||||
BitDocSet parents = fcontext.searcher.getDocSetBits(parentQuery);
|
||||
DocSet input = fcontext.base;
|
||||
DocSet result;
|
||||
|
||||
if (freq.domain.toChildren) {
|
||||
DocSet filt = fcontext.searcher.getDocSetBits( new MatchAllDocsQuery() );
|
||||
result = BlockJoin.toChildren(input, parents, filt, fcontext.qcontext);
|
||||
} else {
|
||||
result = BlockJoin.toParents(input, parents, fcontext.qcontext);
|
||||
}
|
||||
|
||||
fcontext.base = result;
|
||||
}
|
||||
|
||||
private void handleFilterExclusions() throws IOException {
|
||||
List<String> excludeTags = freq.domain.excludeTags;
|
||||
|
||||
if (excludeTags == null || excludeTags.size() == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -153,7 +199,7 @@ class FacetProcessor<FacetRequestT extends FacetRequest> {
|
|||
}
|
||||
|
||||
IdentityHashMap<Query,Boolean> excludeSet = new IdentityHashMap<>();
|
||||
for (String excludeTag : freq.excludeTags) {
|
||||
for (String excludeTag : excludeTags) {
|
||||
Object olst = tagMap.get(excludeTag);
|
||||
// tagMap has entries of List<String,List<QParser>>, but subject to change in the future
|
||||
if (!(olst instanceof Collection)) continue;
|
||||
|
@ -487,7 +533,7 @@ abstract class FacetParser<FacetRequestT extends FacetRequest> {
|
|||
} else if ("query".equals(type)) {
|
||||
return parseQueryFacet(key, args);
|
||||
} else if ("range".equals(type)) {
|
||||
return parseRangeFacet(key, args);
|
||||
return parseRangeFacet(key, args);
|
||||
}
|
||||
|
||||
return parseStat(key, type, args);
|
||||
|
@ -528,10 +574,41 @@ abstract class FacetParser<FacetRequestT extends FacetRequest> {
|
|||
}
|
||||
|
||||
|
||||
private FacetRequest.Domain getDomain() {
|
||||
if (facet.domain == null) {
|
||||
facet.domain = new FacetRequest.Domain();
|
||||
}
|
||||
return facet.domain;
|
||||
}
|
||||
|
||||
protected void parseCommonParams(Object o) {
|
||||
if (o instanceof Map) {
|
||||
Map<String,Object> m = (Map<String,Object>)o;
|
||||
facet.excludeTags = getStringList(m, "excludeTags");
|
||||
List<String> excludeTags = getStringList(m, "excludeTags");
|
||||
if (excludeTags != null) {
|
||||
getDomain().excludeTags = excludeTags;
|
||||
}
|
||||
|
||||
Map<String,Object> domainMap = (Map<String,Object>) m.get("domain");
|
||||
if (domainMap != null) {
|
||||
excludeTags = getStringList(m, "excludeTags");
|
||||
if (excludeTags != null) {
|
||||
getDomain().excludeTags = excludeTags;
|
||||
}
|
||||
|
||||
String blockParent = (String)domainMap.get("blockParent");
|
||||
String blockChildren = (String)domainMap.get("blockChildren");
|
||||
|
||||
if (blockParent != null) {
|
||||
getDomain().toParent = true;
|
||||
getDomain().parents = blockParent;
|
||||
} else if (blockChildren != null) {
|
||||
getDomain().toChildren = true;
|
||||
getDomain().parents = blockChildren;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -696,6 +773,34 @@ class FacetQueryParser extends FacetParser<FacetQuery> {
|
|||
}
|
||||
}
|
||||
|
||||
/*** not a separate type of parser for now...
|
||||
class FacetBlockParentParser extends FacetParser<FacetBlockParent> {
|
||||
public FacetBlockParentParser(FacetParser parent, String key) {
|
||||
super(parent, key);
|
||||
facet = new FacetBlockParent();
|
||||
}
|
||||
|
||||
@Override
|
||||
public FacetBlockParent parse(Object arg) throws SyntaxError {
|
||||
parseCommonParams(arg);
|
||||
|
||||
if (arg instanceof String) {
|
||||
// just the field name...
|
||||
facet.parents = (String)arg;
|
||||
|
||||
} else if (arg instanceof Map) {
|
||||
Map<String, Object> m = (Map<String, Object>) arg;
|
||||
facet.parents = getString(m, "parents", null);
|
||||
|
||||
parseSubs( m.get("facet") );
|
||||
}
|
||||
|
||||
return facet;
|
||||
}
|
||||
}
|
||||
***/
|
||||
|
||||
|
||||
class FacetFieldParser extends FacetParser<FacetField> {
|
||||
public FacetFieldParser(FacetParser parent, String key) {
|
||||
super(parent, key);
|
||||
|
|
|
@ -1072,6 +1072,82 @@ public class TestJsonFacets extends SolrTestCaseHS {
|
|||
);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Test
|
||||
public void testBlockJoin() throws Exception {
|
||||
doBlockJoin(Client.localClient());
|
||||
}
|
||||
|
||||
public void doBlockJoin(Client client) throws Exception {
|
||||
ModifiableSolrParams p = params("rows","0");
|
||||
|
||||
client.deleteByQuery("*:*", null);
|
||||
|
||||
SolrInputDocument parent;
|
||||
parent = sdoc("id", "1", "type_s","book", "book_s","A", "v_t","q");
|
||||
client.add(parent, null);
|
||||
|
||||
parent = sdoc("id", "2", "type_s","book", "book_s","B", "v_t","q w");
|
||||
parent.addChildDocument( sdoc("id","2.1", "type_s","page", "page_s","a", "v_t","x y z") );
|
||||
parent.addChildDocument( sdoc("id","2.2", "type_s","page", "page_s","b", "v_t","x y ") );
|
||||
parent.addChildDocument( sdoc("id","2.3", "type_s","page", "page_s","c", "v_t"," y z" ) );
|
||||
client.add(parent, null);
|
||||
|
||||
parent = sdoc("id", "3", "type_s","book", "book_s","C", "v_t","q w e");
|
||||
parent.addChildDocument( sdoc("id","3.1", "type_s","page", "page_s","d", "v_t","x ") );
|
||||
parent.addChildDocument( sdoc("id","3.2", "type_s","page", "page_s","e", "v_t"," y ") );
|
||||
parent.addChildDocument( sdoc("id","3.3", "type_s","page", "page_s","f", "v_t"," z") );
|
||||
client.add(parent, null);
|
||||
|
||||
parent = sdoc("id", "4", "type_s","book", "book_s","D", "v_t","e");
|
||||
client.add(parent, null);
|
||||
|
||||
client.commit();
|
||||
|
||||
client.testJQ(params(p, "q", "*:*"
|
||||
, "json.facet", "{ " +
|
||||
"pages:{ type:query, domain:{blockChildren:'type_s:book'} , facet:{ x:{field:v_t} } }" +
|
||||
",pages2:{type:terms, field:v_t, domain:{blockChildren:'type_s:book'} }" +
|
||||
",books:{ type:query, domain:{blockParent:'type_s:book'} , facet:{ x:{field:v_t} } }" +
|
||||
",books2:{type:terms, field:v_t, domain:{blockParent:'type_s:book'} }" +
|
||||
",pageof3:{ type:query, q:'id:3', facet : { x : { type:terms, field:page_s, domain:{blockChildren:'type_s:book'}}} }" +
|
||||
",bookof22:{ type:query, q:'id:2.2', facet : { x : { type:terms, field:book_s, domain:{blockParent:'type_s:book'}}} }" +
|
||||
",missing_blockParent:{ type:query, domain:{blockParent:'type_s:does_not_exist'} }" +
|
||||
",missing_blockChildren:{ type:query, domain:{blockChildren:'type_s:does_not_exist'} }" +
|
||||
"}"
|
||||
)
|
||||
, "facets=={ count:10" +
|
||||
", pages:{count:6 , x:{buckets:[ {val:y,count:4},{val:x,count:3},{val:z,count:3} ]} }" +
|
||||
", pages2:{ buckets:[ {val:y,count:4},{val:x,count:3},{val:z,count:3} ] }" +
|
||||
", books:{count:4 , x:{buckets:[ {val:q,count:3},{val:e,count:2},{val:w,count:2} ]} }" +
|
||||
", books2:{ buckets:[ {val:q,count:3},{val:e,count:2},{val:w,count:2} ] }" +
|
||||
", pageof3:{count:1 , x:{buckets:[ {val:d,count:1},{val:e,count:1},{val:f,count:1} ]} }" +
|
||||
", bookof22:{count:1 , x:{buckets:[ {val:B,count:1} ]} }" +
|
||||
", missing_blockParent:{count:0}" +
|
||||
", missing_blockChildren:{count:0}" +
|
||||
"}"
|
||||
);
|
||||
|
||||
// no matches in base query
|
||||
client.testJQ(params("q", "no_match_s:NO_MATCHES"
|
||||
, "json.facet", "{ processEmpty:true," +
|
||||
"pages:{ type:query, domain:{blockChildren:'type_s:book'} }" +
|
||||
",books:{ type:query, domain:{blockParent:'type_s:book'} }" +
|
||||
"}"
|
||||
)
|
||||
, "facets=={ count:0" +
|
||||
", pages:{count:0}" +
|
||||
", books:{count:0}" +
|
||||
"}"
|
||||
);
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
public void XtestPercentiles() {
|
||||
AVLTreeDigest catA = new AVLTreeDigest(100);
|
||||
catA.add(4);
|
||||
|
|
Loading…
Reference in New Issue