mirror of https://github.com/apache/lucene.git
SOLR-7676: nested docs facet domain switching
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1685340 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b202035b93
commit
38292f791e
|
@ -116,6 +116,13 @@ New Features
|
||||||
|
|
||||||
* SOLR-7458: Expose HDFS Block Locality Metrics via JMX (Mike Drob via Mark Miller)
|
* SOLR-7458: Expose HDFS Block Locality Metrics via JMX (Mike Drob via Mark Miller)
|
||||||
|
|
||||||
|
* SOLR-7676: Faceting on nested objects / Block-join faceting with the new JSON Facet API.
|
||||||
|
Example: Assuming books with nested pages and an input domain of pages, the following
|
||||||
|
will switch the domain to books before faceting on the author field:
|
||||||
|
authors:{ type:terms, field:author, domain:{toParent:"type:book"} }
|
||||||
|
(yonik)
|
||||||
|
|
||||||
|
|
||||||
Bug Fixes
|
Bug Fixes
|
||||||
----------------------
|
----------------------
|
||||||
|
|
||||||
|
|
|
@ -26,8 +26,10 @@ import org.apache.solr.common.SolrException;
|
||||||
import org.apache.solr.core.SolrCore;
|
import org.apache.solr.core.SolrCore;
|
||||||
import org.apache.solr.request.SolrRequestInfo;
|
import org.apache.solr.request.SolrRequestInfo;
|
||||||
|
|
||||||
/**
|
/*
|
||||||
* Bridge between old style context and a real class
|
* Bridge between old style context and a real class.
|
||||||
|
* This is currently slightly more heavy weight than necessary because of the need to inherit from IdentityHashMap rather than
|
||||||
|
* instantiate it on demand (and the need to put "searcher" in the map)
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
public class QueryContext extends IdentityHashMap implements Closeable {
|
public class QueryContext extends IdentityHashMap implements Closeable {
|
||||||
|
@ -45,7 +47,7 @@ public class QueryContext extends IdentityHashMap implements Closeable {
|
||||||
public QueryContext(IndexSearcher searcher) {
|
public QueryContext(IndexSearcher searcher) {
|
||||||
this.searcher = searcher instanceof SolrIndexSearcher ? (SolrIndexSearcher)searcher : null;
|
this.searcher = searcher instanceof SolrIndexSearcher ? (SolrIndexSearcher)searcher : null;
|
||||||
indexSearcher = searcher;
|
indexSearcher = searcher;
|
||||||
this.put("searcher", searcher); // see ValueSource.newContext()
|
this.put("searcher", searcher); // see ValueSource.newContext() // TODO: move check to "get"?
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -873,6 +873,25 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
|
||||||
getDocSet(query);
|
getDocSet(query);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public BitDocSet getDocSetBits(Query q) throws IOException {
|
||||||
|
DocSet answer = getDocSet(q);
|
||||||
|
if (answer instanceof BitDocSet) {
|
||||||
|
return (BitDocSet)answer;
|
||||||
|
}
|
||||||
|
|
||||||
|
FixedBitSet bs = new FixedBitSet(maxDoc());
|
||||||
|
DocIterator iter = answer.iterator();
|
||||||
|
while (iter.hasNext()) {
|
||||||
|
bs.set(iter.nextDoc());
|
||||||
|
}
|
||||||
|
|
||||||
|
BitDocSet answerBits = new BitDocSet(bs , answer.size());
|
||||||
|
if (filterCache != null) {
|
||||||
|
filterCache.put(q, answerBits);
|
||||||
|
}
|
||||||
|
return answerBits;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the set of document ids matching a query.
|
* Returns the set of document ids matching a query.
|
||||||
* This method is cache-aware and attempts to retrieve the answer from the cache if possible.
|
* This method is cache-aware and attempts to retrieve the answer from the cache if possible.
|
||||||
|
|
|
@ -0,0 +1,77 @@
|
||||||
|
package org.apache.solr.search.facet;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
|
import org.apache.lucene.util.FixedBitSet;
|
||||||
|
import org.apache.solr.search.BitDocSet;
|
||||||
|
import org.apache.solr.search.DocIterator;
|
||||||
|
import org.apache.solr.search.DocSet;
|
||||||
|
import org.apache.solr.search.DocSetCollector;
|
||||||
|
import org.apache.solr.search.QueryContext;
|
||||||
|
|
||||||
|
/** @lucene.experimental */
|
||||||
|
public class BlockJoin {
|
||||||
|
|
||||||
|
/** acceptDocs will normally be used to avoid deleted documents from being generated as part of the answer DocSet (just use *:*)
|
||||||
|
* although it can be used to further constrain the generated documents.
|
||||||
|
*/
|
||||||
|
public static DocSet toChildren(DocSet parentInput, BitDocSet parentList, DocSet acceptDocs, QueryContext qcontext) throws IOException {
|
||||||
|
FixedBitSet parentBits = parentList.getBits();
|
||||||
|
DocSetCollector collector = new DocSetCollector(qcontext.searcher().maxDoc()>>6, qcontext.searcher().maxDoc());
|
||||||
|
DocIterator iter = parentInput.iterator();
|
||||||
|
while (iter.hasNext()) {
|
||||||
|
int parentDoc = iter.nextDoc();
|
||||||
|
if (!parentList.exists(parentDoc) || parentDoc == 0) { // test for parentDoc==0 here to avoid passing -1 to prevSetBit later on
|
||||||
|
// not a parent, or parent has no children
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
int prevParent = parentBits.prevSetBit(parentDoc - 1);
|
||||||
|
for (int childDoc = prevParent+1; childDoc<parentDoc; childDoc++) {
|
||||||
|
if (acceptDocs != null && !acceptDocs.exists(childDoc)) continue; // only select live docs
|
||||||
|
collector.collect(childDoc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return collector.getDocSet();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** childInput may also contain parents (i.e. a parent or below will all roll up to that parent) */
|
||||||
|
public static DocSet toParents(DocSet childInput, BitDocSet parentList, QueryContext qcontext) throws IOException {
|
||||||
|
FixedBitSet parentBits = parentList.getBits();
|
||||||
|
DocSetCollector collector = new DocSetCollector(qcontext.searcher().maxDoc()>>6, qcontext.searcher().maxDoc());
|
||||||
|
DocIterator iter = childInput.iterator();
|
||||||
|
int currentParent = -1;
|
||||||
|
while (iter.hasNext()) {
|
||||||
|
int childDoc = iter.nextDoc(); // TODO: skipping
|
||||||
|
if (childDoc <= currentParent) { // use <= since we also allow parents in the input
|
||||||
|
// we already visited this parent
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
currentParent = parentBits.nextSetBit(childDoc);
|
||||||
|
if (currentParent != DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
|
// only collect the parent the first time we skip to it
|
||||||
|
collector.collect( currentParent );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return collector.getDocSet();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -59,3 +59,4 @@ class FacetQueryProcessor extends FacetProcessor<FacetQuery> {
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -31,6 +31,7 @@ import java.util.Map;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.search.BooleanClause;
|
import org.apache.lucene.search.BooleanClause;
|
||||||
import org.apache.lucene.search.BooleanQuery;
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
|
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.solr.common.SolrException;
|
import org.apache.solr.common.SolrException;
|
||||||
import org.apache.solr.common.params.FacetParams;
|
import org.apache.solr.common.params.FacetParams;
|
||||||
|
@ -41,6 +42,7 @@ import org.apache.solr.request.SolrQueryRequest;
|
||||||
import org.apache.solr.request.SolrRequestInfo;
|
import org.apache.solr.request.SolrRequestInfo;
|
||||||
import org.apache.solr.schema.IndexSchema;
|
import org.apache.solr.schema.IndexSchema;
|
||||||
import org.apache.solr.schema.SchemaField;
|
import org.apache.solr.schema.SchemaField;
|
||||||
|
import org.apache.solr.search.BitDocSet;
|
||||||
import org.apache.solr.search.DocIterator;
|
import org.apache.solr.search.DocIterator;
|
||||||
import org.apache.solr.search.DocSet;
|
import org.apache.solr.search.DocSet;
|
||||||
import org.apache.solr.search.FunctionQParser;
|
import org.apache.solr.search.FunctionQParser;
|
||||||
|
@ -54,8 +56,17 @@ import org.apache.solr.search.SyntaxError;
|
||||||
public abstract class FacetRequest {
|
public abstract class FacetRequest {
|
||||||
protected Map<String,AggValueSource> facetStats; // per-bucket statistics
|
protected Map<String,AggValueSource> facetStats; // per-bucket statistics
|
||||||
protected Map<String,FacetRequest> subFacets; // list of facets
|
protected Map<String,FacetRequest> subFacets; // list of facets
|
||||||
protected List<String> excludeTags;
|
protected List<String> filters;
|
||||||
protected boolean processEmpty;
|
protected boolean processEmpty;
|
||||||
|
protected Domain domain;
|
||||||
|
|
||||||
|
// domain changes
|
||||||
|
public static class Domain {
|
||||||
|
public List<String> excludeTags;
|
||||||
|
public boolean toParent;
|
||||||
|
public boolean toChildren;
|
||||||
|
public String parents;
|
||||||
|
}
|
||||||
|
|
||||||
public FacetRequest() {
|
public FacetRequest() {
|
||||||
facetStats = new LinkedHashMap<>();
|
facetStats = new LinkedHashMap<>();
|
||||||
|
@ -140,7 +151,42 @@ class FacetProcessor<FacetRequestT extends FacetRequest> {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void handleDomainChanges() throws IOException {
|
protected void handleDomainChanges() throws IOException {
|
||||||
if (freq.excludeTags == null || freq.excludeTags.size() == 0) {
|
if (freq.domain == null) return;
|
||||||
|
handleFilterExclusions();
|
||||||
|
handleBlockJoin();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void handleBlockJoin() throws IOException {
|
||||||
|
if (!(freq.domain.toChildren || freq.domain.toParent)) return;
|
||||||
|
|
||||||
|
// TODO: avoid query parsing per-bucket somehow...
|
||||||
|
String parentStr = freq.domain.parents;
|
||||||
|
Query parentQuery;
|
||||||
|
try {
|
||||||
|
QParser parser = QParser.getParser(parentStr, null, fcontext.req);
|
||||||
|
parentQuery = parser.getQuery();
|
||||||
|
} catch (SyntaxError err) {
|
||||||
|
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error parsing block join parent specification: " + parentStr);
|
||||||
|
}
|
||||||
|
|
||||||
|
BitDocSet parents = fcontext.searcher.getDocSetBits(parentQuery);
|
||||||
|
DocSet input = fcontext.base;
|
||||||
|
DocSet result;
|
||||||
|
|
||||||
|
if (freq.domain.toChildren) {
|
||||||
|
DocSet filt = fcontext.searcher.getDocSetBits( new MatchAllDocsQuery() );
|
||||||
|
result = BlockJoin.toChildren(input, parents, filt, fcontext.qcontext);
|
||||||
|
} else {
|
||||||
|
result = BlockJoin.toParents(input, parents, fcontext.qcontext);
|
||||||
|
}
|
||||||
|
|
||||||
|
fcontext.base = result;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void handleFilterExclusions() throws IOException {
|
||||||
|
List<String> excludeTags = freq.domain.excludeTags;
|
||||||
|
|
||||||
|
if (excludeTags == null || excludeTags.size() == 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -153,7 +199,7 @@ class FacetProcessor<FacetRequestT extends FacetRequest> {
|
||||||
}
|
}
|
||||||
|
|
||||||
IdentityHashMap<Query,Boolean> excludeSet = new IdentityHashMap<>();
|
IdentityHashMap<Query,Boolean> excludeSet = new IdentityHashMap<>();
|
||||||
for (String excludeTag : freq.excludeTags) {
|
for (String excludeTag : excludeTags) {
|
||||||
Object olst = tagMap.get(excludeTag);
|
Object olst = tagMap.get(excludeTag);
|
||||||
// tagMap has entries of List<String,List<QParser>>, but subject to change in the future
|
// tagMap has entries of List<String,List<QParser>>, but subject to change in the future
|
||||||
if (!(olst instanceof Collection)) continue;
|
if (!(olst instanceof Collection)) continue;
|
||||||
|
@ -528,10 +574,41 @@ abstract class FacetParser<FacetRequestT extends FacetRequest> {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private FacetRequest.Domain getDomain() {
|
||||||
|
if (facet.domain == null) {
|
||||||
|
facet.domain = new FacetRequest.Domain();
|
||||||
|
}
|
||||||
|
return facet.domain;
|
||||||
|
}
|
||||||
|
|
||||||
protected void parseCommonParams(Object o) {
|
protected void parseCommonParams(Object o) {
|
||||||
if (o instanceof Map) {
|
if (o instanceof Map) {
|
||||||
Map<String,Object> m = (Map<String,Object>)o;
|
Map<String,Object> m = (Map<String,Object>)o;
|
||||||
facet.excludeTags = getStringList(m, "excludeTags");
|
List<String> excludeTags = getStringList(m, "excludeTags");
|
||||||
|
if (excludeTags != null) {
|
||||||
|
getDomain().excludeTags = excludeTags;
|
||||||
|
}
|
||||||
|
|
||||||
|
Map<String,Object> domainMap = (Map<String,Object>) m.get("domain");
|
||||||
|
if (domainMap != null) {
|
||||||
|
excludeTags = getStringList(m, "excludeTags");
|
||||||
|
if (excludeTags != null) {
|
||||||
|
getDomain().excludeTags = excludeTags;
|
||||||
|
}
|
||||||
|
|
||||||
|
String blockParent = (String)domainMap.get("blockParent");
|
||||||
|
String blockChildren = (String)domainMap.get("blockChildren");
|
||||||
|
|
||||||
|
if (blockParent != null) {
|
||||||
|
getDomain().toParent = true;
|
||||||
|
getDomain().parents = blockParent;
|
||||||
|
} else if (blockChildren != null) {
|
||||||
|
getDomain().toChildren = true;
|
||||||
|
getDomain().parents = blockChildren;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -696,6 +773,34 @@ class FacetQueryParser extends FacetParser<FacetQuery> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*** not a separate type of parser for now...
|
||||||
|
class FacetBlockParentParser extends FacetParser<FacetBlockParent> {
|
||||||
|
public FacetBlockParentParser(FacetParser parent, String key) {
|
||||||
|
super(parent, key);
|
||||||
|
facet = new FacetBlockParent();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public FacetBlockParent parse(Object arg) throws SyntaxError {
|
||||||
|
parseCommonParams(arg);
|
||||||
|
|
||||||
|
if (arg instanceof String) {
|
||||||
|
// just the field name...
|
||||||
|
facet.parents = (String)arg;
|
||||||
|
|
||||||
|
} else if (arg instanceof Map) {
|
||||||
|
Map<String, Object> m = (Map<String, Object>) arg;
|
||||||
|
facet.parents = getString(m, "parents", null);
|
||||||
|
|
||||||
|
parseSubs( m.get("facet") );
|
||||||
|
}
|
||||||
|
|
||||||
|
return facet;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
***/
|
||||||
|
|
||||||
|
|
||||||
class FacetFieldParser extends FacetParser<FacetField> {
|
class FacetFieldParser extends FacetParser<FacetField> {
|
||||||
public FacetFieldParser(FacetParser parent, String key) {
|
public FacetFieldParser(FacetParser parent, String key) {
|
||||||
super(parent, key);
|
super(parent, key);
|
||||||
|
|
|
@ -1072,6 +1072,82 @@ public class TestJsonFacets extends SolrTestCaseHS {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testBlockJoin() throws Exception {
|
||||||
|
doBlockJoin(Client.localClient());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void doBlockJoin(Client client) throws Exception {
|
||||||
|
ModifiableSolrParams p = params("rows","0");
|
||||||
|
|
||||||
|
client.deleteByQuery("*:*", null);
|
||||||
|
|
||||||
|
SolrInputDocument parent;
|
||||||
|
parent = sdoc("id", "1", "type_s","book", "book_s","A", "v_t","q");
|
||||||
|
client.add(parent, null);
|
||||||
|
|
||||||
|
parent = sdoc("id", "2", "type_s","book", "book_s","B", "v_t","q w");
|
||||||
|
parent.addChildDocument( sdoc("id","2.1", "type_s","page", "page_s","a", "v_t","x y z") );
|
||||||
|
parent.addChildDocument( sdoc("id","2.2", "type_s","page", "page_s","b", "v_t","x y ") );
|
||||||
|
parent.addChildDocument( sdoc("id","2.3", "type_s","page", "page_s","c", "v_t"," y z" ) );
|
||||||
|
client.add(parent, null);
|
||||||
|
|
||||||
|
parent = sdoc("id", "3", "type_s","book", "book_s","C", "v_t","q w e");
|
||||||
|
parent.addChildDocument( sdoc("id","3.1", "type_s","page", "page_s","d", "v_t","x ") );
|
||||||
|
parent.addChildDocument( sdoc("id","3.2", "type_s","page", "page_s","e", "v_t"," y ") );
|
||||||
|
parent.addChildDocument( sdoc("id","3.3", "type_s","page", "page_s","f", "v_t"," z") );
|
||||||
|
client.add(parent, null);
|
||||||
|
|
||||||
|
parent = sdoc("id", "4", "type_s","book", "book_s","D", "v_t","e");
|
||||||
|
client.add(parent, null);
|
||||||
|
|
||||||
|
client.commit();
|
||||||
|
|
||||||
|
client.testJQ(params(p, "q", "*:*"
|
||||||
|
, "json.facet", "{ " +
|
||||||
|
"pages:{ type:query, domain:{blockChildren:'type_s:book'} , facet:{ x:{field:v_t} } }" +
|
||||||
|
",pages2:{type:terms, field:v_t, domain:{blockChildren:'type_s:book'} }" +
|
||||||
|
",books:{ type:query, domain:{blockParent:'type_s:book'} , facet:{ x:{field:v_t} } }" +
|
||||||
|
",books2:{type:terms, field:v_t, domain:{blockParent:'type_s:book'} }" +
|
||||||
|
",pageof3:{ type:query, q:'id:3', facet : { x : { type:terms, field:page_s, domain:{blockChildren:'type_s:book'}}} }" +
|
||||||
|
",bookof22:{ type:query, q:'id:2.2', facet : { x : { type:terms, field:book_s, domain:{blockParent:'type_s:book'}}} }" +
|
||||||
|
",missing_blockParent:{ type:query, domain:{blockParent:'type_s:does_not_exist'} }" +
|
||||||
|
",missing_blockChildren:{ type:query, domain:{blockChildren:'type_s:does_not_exist'} }" +
|
||||||
|
"}"
|
||||||
|
)
|
||||||
|
, "facets=={ count:10" +
|
||||||
|
", pages:{count:6 , x:{buckets:[ {val:y,count:4},{val:x,count:3},{val:z,count:3} ]} }" +
|
||||||
|
", pages2:{ buckets:[ {val:y,count:4},{val:x,count:3},{val:z,count:3} ] }" +
|
||||||
|
", books:{count:4 , x:{buckets:[ {val:q,count:3},{val:e,count:2},{val:w,count:2} ]} }" +
|
||||||
|
", books2:{ buckets:[ {val:q,count:3},{val:e,count:2},{val:w,count:2} ] }" +
|
||||||
|
", pageof3:{count:1 , x:{buckets:[ {val:d,count:1},{val:e,count:1},{val:f,count:1} ]} }" +
|
||||||
|
", bookof22:{count:1 , x:{buckets:[ {val:B,count:1} ]} }" +
|
||||||
|
", missing_blockParent:{count:0}" +
|
||||||
|
", missing_blockChildren:{count:0}" +
|
||||||
|
"}"
|
||||||
|
);
|
||||||
|
|
||||||
|
// no matches in base query
|
||||||
|
client.testJQ(params("q", "no_match_s:NO_MATCHES"
|
||||||
|
, "json.facet", "{ processEmpty:true," +
|
||||||
|
"pages:{ type:query, domain:{blockChildren:'type_s:book'} }" +
|
||||||
|
",books:{ type:query, domain:{blockParent:'type_s:book'} }" +
|
||||||
|
"}"
|
||||||
|
)
|
||||||
|
, "facets=={ count:0" +
|
||||||
|
", pages:{count:0}" +
|
||||||
|
", books:{count:0}" +
|
||||||
|
"}"
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
public void XtestPercentiles() {
|
public void XtestPercentiles() {
|
||||||
AVLTreeDigest catA = new AVLTreeDigest(100);
|
AVLTreeDigest catA = new AVLTreeDigest(100);
|
||||||
catA.add(4);
|
catA.add(4);
|
||||||
|
|
Loading…
Reference in New Issue