mirror of https://github.com/apache/lucene.git
SOLR-7477: implement facet excludeTags
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1676743 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
7d48921099
commit
ecc3a5a786
|
@ -153,6 +153,14 @@ New Features
|
|||
|
||||
* SOLR-7437: Make HDFS transaction log replication factor configurable. (Mark Miller)
|
||||
|
||||
* SOLR-7477: Multi-select faceting support for the Facet Module via the "excludeTags"
|
||||
parameter which disregards any matching tagged filters for that facet. Example:
|
||||
& q=shoes
|
||||
& fq={!tag=COLOR}color:blue
|
||||
& json.facet={ colors:{type:terms, field:color, excludeTags=COLOR} }
|
||||
(yonik)
|
||||
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -194,6 +194,7 @@ abstract class FacetFieldProcessorFCBase extends FacetFieldProcessor {
|
|||
|
||||
@Override
|
||||
public void process() throws IOException {
|
||||
super.process();
|
||||
sf = fcontext.searcher.getSchema().getField(freq.field);
|
||||
response = getFieldCacheCounts();
|
||||
}
|
||||
|
@ -340,10 +341,9 @@ abstract class FacetFieldProcessorFCBase extends FacetFieldProcessor {
|
|||
|
||||
// handle sub-facets for this bucket
|
||||
if (freq.getSubFacets().size() > 0) {
|
||||
FacetContext subContext = fcontext.sub();
|
||||
subContext.base = fcontext.searcher.getDocSet(new TermQuery(new Term(sf.getName(), br.clone())), fcontext.base);
|
||||
TermQuery filter = new TermQuery(new Term(sf.getName(), br.clone()));
|
||||
try {
|
||||
fillBucketSubs(bucket, subContext);
|
||||
processSubs(bucket, filter, fcontext.searcher.getDocSet(filter, fcontext.base) );
|
||||
} finally {
|
||||
// subContext.base.decref(); // OFF-HEAP
|
||||
// subContext.base = null; // do not modify context after creation... there may be deferred execution (i.e. streaming)
|
||||
|
@ -368,13 +368,11 @@ abstract class FacetFieldProcessorFCBase extends FacetFieldProcessor {
|
|||
}
|
||||
|
||||
if (freq.getSubFacets().size() > 0) {
|
||||
FacetContext subContext = fcontext.sub();
|
||||
// TODO: we can do better than this!
|
||||
if (missingDocSet == null) {
|
||||
missingDocSet = getFieldMissing(fcontext.searcher, fcontext.base, freq.field);
|
||||
}
|
||||
subContext.base = missingDocSet;
|
||||
fillBucketSubs(missingBucket, subContext);
|
||||
processSubs(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), missingDocSet);
|
||||
}
|
||||
|
||||
res.add("missing", missingBucket);
|
||||
|
@ -542,6 +540,8 @@ class FacetFieldProcessorStream extends FacetFieldProcessor implements Closeable
|
|||
|
||||
@Override
|
||||
public void process() throws IOException {
|
||||
super.process();
|
||||
|
||||
// We need to keep the fcontext open after processing is done (since we will be streaming in the response writer).
|
||||
// But if the connection is broken, we want to clean up.
|
||||
// fcontext.base.incref(); // OFF-HEAP
|
||||
|
@ -790,13 +790,15 @@ class FacetFieldProcessorStream extends FacetFieldProcessor implements Closeable
|
|||
|
||||
// OK, we have a good bucket to return... first get bucket value before moving to next term
|
||||
Object bucketVal = sf.getType().toObject(sf, term);
|
||||
BytesRef termCopy = BytesRef.deepCopyOf(term);
|
||||
term = termsEnum.next();
|
||||
|
||||
SimpleOrderedMap<Object> bucket = new SimpleOrderedMap<>();
|
||||
bucket.add("val", bucketVal);
|
||||
addStats(bucket, 0);
|
||||
if (hasSubFacets) {
|
||||
processSubs(bucket, termSet);
|
||||
TermQuery filter = new TermQuery(new Term(freq.field, termCopy));
|
||||
processSubs(bucket, filter, termSet);
|
||||
}
|
||||
|
||||
// TODO... termSet needs to stick around for streaming sub-facets?
|
||||
|
|
|
@ -52,6 +52,7 @@ class FacetQueryProcessor extends FacetProcessor<FacetQuery> {
|
|||
|
||||
@Override
|
||||
public void process() throws IOException {
|
||||
super.process();
|
||||
response = new SimpleOrderedMap<>();
|
||||
fillBucket(response, freq.q);
|
||||
}
|
||||
|
|
|
@ -70,6 +70,8 @@ class FacetRangeProcessor extends FacetProcessor<FacetRange> {
|
|||
|
||||
@Override
|
||||
public void process() throws IOException {
|
||||
super.process();
|
||||
|
||||
// Under the normal mincount=0, each shard will need to return 0 counts since we don't calculate buckets at the top level.
|
||||
// But if mincount>0 then our sub mincount can be set to 1.
|
||||
|
||||
|
@ -223,6 +225,9 @@ class FacetRangeProcessor extends FacetProcessor<FacetRange> {
|
|||
|
||||
int slotCount = rangeList.size() + otherList.size();
|
||||
intersections = new DocSet[slotCount];
|
||||
filters = new Query[slotCount];
|
||||
|
||||
|
||||
createAccs(fcontext.base.size(), slotCount);
|
||||
prepareForCollection();
|
||||
|
||||
|
@ -261,12 +266,14 @@ class FacetRangeProcessor extends FacetProcessor<FacetRange> {
|
|||
return res;
|
||||
}
|
||||
|
||||
private Query[] filters;
|
||||
private DocSet[] intersections;
|
||||
private void rangeStats(Range range, int slot) throws IOException {
|
||||
Query rangeQ = sf.getType().getRangeQuery(null, sf, range.low == null ? null : calc.formatValue(range.low), range.high==null ? null : calc.formatValue(range.high), range.includeLower, range.includeUpper);
|
||||
// TODO: specialize count only
|
||||
DocSet intersection = fcontext.searcher.getDocSet(rangeQ, fcontext.base);
|
||||
intersections[slot] = intersection; // save for later
|
||||
filters[slot] = rangeQ;
|
||||
intersections[slot] = intersection; // save for later // TODO: only save if number of slots is small enough?
|
||||
int num = collect(intersection, slot);
|
||||
countAcc.incrementCount(slot, num); // TODO: roll this into collect()
|
||||
}
|
||||
|
@ -275,11 +282,8 @@ class FacetRangeProcessor extends FacetProcessor<FacetRange> {
|
|||
// handle sub-facets for this bucket
|
||||
if (freq.getSubFacets().size() > 0) {
|
||||
DocSet subBase = intersections[slot];
|
||||
if (subBase.size() == 0) return;
|
||||
FacetContext subContext = fcontext.sub();
|
||||
subContext.base = subBase;
|
||||
try {
|
||||
fillBucketSubs(bucket, subContext);
|
||||
processSubs(bucket, filters[slot], subBase);
|
||||
} finally {
|
||||
// subContext.base.decref(); // OFF-HEAP
|
||||
// subContext.base = null; // do not modify context after creation... there may be deferred execution (i.e. streaming)
|
||||
|
|
|
@ -19,19 +19,26 @@ package org.apache.solr.search.facet;
|
|||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.EnumSet;
|
||||
import java.util.IdentityHashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.params.FacetParams;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.common.util.StrUtils;
|
||||
import org.apache.solr.handler.component.ResponseBuilder;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.request.SolrRequestInfo;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.search.DocIterator;
|
||||
|
@ -47,7 +54,7 @@ import org.apache.solr.search.SyntaxError;
|
|||
public abstract class FacetRequest {
|
||||
protected Map<String,AggValueSource> facetStats; // per-bucket statistics
|
||||
protected Map<String,FacetRequest> subFacets; // list of facets
|
||||
protected List<String> excludeFilters;
|
||||
protected List<String> excludeTags;
|
||||
protected boolean processEmpty;
|
||||
|
||||
public FacetRequest() {
|
||||
|
@ -84,6 +91,7 @@ class FacetContext {
|
|||
QueryContext qcontext;
|
||||
SolrQueryRequest req; // TODO: replace with params?
|
||||
SolrIndexSearcher searcher;
|
||||
Query filter; // TODO: keep track of as a DocSet or as a Query?
|
||||
DocSet base;
|
||||
FacetContext parent;
|
||||
int flags;
|
||||
|
@ -92,15 +100,22 @@ class FacetContext {
|
|||
return (flags & IS_SHARD) != 0;
|
||||
}
|
||||
|
||||
public FacetContext sub() {
|
||||
/**
|
||||
* @param filter The filter for the bucket that resulted in this context/domain. Can be null if this is the root context.
|
||||
* @param domain The resulting set of documents for this facet.
|
||||
*/
|
||||
public FacetContext sub(Query filter, DocSet domain) {
|
||||
FacetContext ctx = new FacetContext();
|
||||
ctx.parent = this;
|
||||
ctx.base = domain;
|
||||
ctx.filter = filter;
|
||||
|
||||
// carry over from parent
|
||||
ctx.flags = flags;
|
||||
ctx.qcontext = qcontext;
|
||||
ctx.req = req;
|
||||
ctx.searcher = searcher;
|
||||
ctx.base = base;
|
||||
|
||||
ctx.parent = this;
|
||||
return ctx;
|
||||
}
|
||||
}
|
||||
|
@ -121,10 +136,69 @@ class FacetProcessor<FacetRequestT extends FacetRequest> {
|
|||
}
|
||||
|
||||
public void process() throws IOException {
|
||||
|
||||
|
||||
handleDomainChanges();
|
||||
}
|
||||
|
||||
protected void handleDomainChanges() throws IOException {
|
||||
if (freq.excludeTags == null || freq.excludeTags.size() == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
// TODO: somehow remove responsebuilder dependency
|
||||
ResponseBuilder rb = SolrRequestInfo.getRequestInfo().getResponseBuilder();
|
||||
Map tagMap = (Map) rb.req.getContext().get("tags");
|
||||
if (tagMap == null) {
|
||||
// no filters were tagged
|
||||
return;
|
||||
}
|
||||
|
||||
IdentityHashMap<Query,Boolean> excludeSet = new IdentityHashMap<>();
|
||||
for (String excludeTag : freq.excludeTags) {
|
||||
Object olst = tagMap.get(excludeTag);
|
||||
// tagMap has entries of List<String,List<QParser>>, but subject to change in the future
|
||||
if (!(olst instanceof Collection)) continue;
|
||||
for (Object o : (Collection<?>)olst) {
|
||||
if (!(o instanceof QParser)) continue;
|
||||
QParser qp = (QParser)o;
|
||||
try {
|
||||
excludeSet.put(qp.getQuery(), Boolean.TRUE);
|
||||
} catch (SyntaxError syntaxError) {
|
||||
// This should not happen since we should only be retrieving a previously parsed query
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, syntaxError);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (excludeSet.size() == 0) return;
|
||||
|
||||
List<Query> qlist = new ArrayList<>();
|
||||
|
||||
// add the base query
|
||||
if (!excludeSet.containsKey(rb.getQuery())) {
|
||||
qlist.add(rb.getQuery());
|
||||
}
|
||||
|
||||
// add the filters
|
||||
if (rb.getFilters() != null) {
|
||||
for (Query q : rb.getFilters()) {
|
||||
if (!excludeSet.containsKey(q)) {
|
||||
qlist.add(q);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// now walk back up the context tree
|
||||
// TODO: we lose parent exclusions...
|
||||
for (FacetContext curr = fcontext; curr != null; curr = curr.parent) {
|
||||
if (curr.filter != null) {
|
||||
qlist.add( curr.filter );
|
||||
}
|
||||
}
|
||||
|
||||
// recompute the base domain
|
||||
fcontext.base = fcontext.searcher.getDocSet(qlist);
|
||||
}
|
||||
|
||||
|
||||
public Object getResponse() {
|
||||
return null;
|
||||
}
|
||||
|
@ -171,8 +245,19 @@ class FacetProcessor<FacetRequestT extends FacetRequest> {
|
|||
}
|
||||
|
||||
|
||||
protected void fillBucketSubs(SimpleOrderedMap<Object> response, FacetContext subContext) throws IOException {
|
||||
protected void processSubs(SimpleOrderedMap<Object> response, Query filter, DocSet domain) throws IOException {
|
||||
|
||||
// TODO: what if a zero bucket has a sub-facet with an exclusion that would yield results?
|
||||
// should we check for domain-altering exclusions, or even ask the sub-facet for
|
||||
// it's domain and then only skip it if it's 0?
|
||||
|
||||
if (domain == null || domain.size() == 0 && !freq.processEmpty) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (Map.Entry<String,FacetRequest> sub : freq.getSubFacets().entrySet()) {
|
||||
// make a new context for each sub-facet since they can change the domain
|
||||
FacetContext subContext = fcontext.sub(filter, domain);
|
||||
FacetProcessor subProcessor = sub.getValue().createFacetProcessor(subContext);
|
||||
subProcessor.process();
|
||||
response.add( sub.getKey(), subProcessor.getResponse() );
|
||||
|
@ -235,9 +320,6 @@ class FacetProcessor<FacetRequestT extends FacetRequest> {
|
|||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
public void fillBucket(SimpleOrderedMap<Object> bucket, Query q) throws IOException {
|
||||
boolean needDocSet = freq.getFacetStats().size() > 0 || freq.getSubFacets().size() > 0;
|
||||
|
||||
|
@ -264,7 +346,7 @@ class FacetProcessor<FacetRequestT extends FacetRequest> {
|
|||
|
||||
try {
|
||||
processStats(bucket, result, (int) count);
|
||||
processSubs(bucket, result);
|
||||
processSubs(bucket, q, result);
|
||||
} finally {
|
||||
if (result != null) {
|
||||
// result.decref(); // OFF-HEAP
|
||||
|
@ -273,23 +355,6 @@ class FacetProcessor<FacetRequestT extends FacetRequest> {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
protected void processSubs(SimpleOrderedMap<Object> bucket, DocSet result) throws IOException {
|
||||
// TODO: process exclusions, etc
|
||||
|
||||
if (result == null || result.size() == 0 && !freq.processEmpty) {
|
||||
return;
|
||||
}
|
||||
|
||||
FacetContext subContext = fcontext.sub();
|
||||
subContext.base = result;
|
||||
|
||||
fillBucketSubs(bucket, subContext);
|
||||
}
|
||||
|
||||
|
||||
public static DocSet getFieldMissing(SolrIndexSearcher searcher, DocSet docs, String fieldName) throws IOException {
|
||||
SchemaField sf = searcher.getSchema().getField(fieldName);
|
||||
DocSet hasVal = searcher.getDocSet(sf.getType().getRangeQuery(null, sf, null, null, false, false));
|
||||
|
@ -298,6 +363,14 @@ class FacetProcessor<FacetRequestT extends FacetRequest> {
|
|||
return answer;
|
||||
}
|
||||
|
||||
public static Query getFieldMissingQuery(SolrIndexSearcher searcher, String fieldName) throws IOException {
|
||||
SchemaField sf = searcher.getSchema().getField(fieldName);
|
||||
Query hasVal = sf.getType().getRangeQuery(null, sf, null, null, false, false);
|
||||
BooleanQuery noVal = new BooleanQuery();
|
||||
noVal.add(hasVal, BooleanClause.Occur.MUST_NOT);
|
||||
return noVal;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
@ -450,6 +523,14 @@ abstract class FacetParser<FacetRequestT extends FacetRequest> {
|
|||
}
|
||||
|
||||
|
||||
protected void parseCommonParams(Object o) {
|
||||
if (o instanceof Map) {
|
||||
Map<String,Object> m = (Map<String,Object>)o;
|
||||
facet.excludeTags = getStringList(m, "excludeTags");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public String getField(Map<String,Object> args) {
|
||||
Object fieldName = args.get("field"); // TODO: pull out into defined constant
|
||||
if (fieldName == null) {
|
||||
|
@ -520,6 +601,20 @@ abstract class FacetParser<FacetRequestT extends FacetRequest> {
|
|||
return (String)o;
|
||||
}
|
||||
|
||||
public List<String> getStringList(Map<String,Object> args, String paramName) {
|
||||
Object o = args.get(paramName);
|
||||
if (o == null) {
|
||||
return null;
|
||||
}
|
||||
if (o instanceof List) {
|
||||
return (List<String>)o;
|
||||
}
|
||||
if (o instanceof String) {
|
||||
return StrUtils.splitSmart((String)o, ",", true);
|
||||
}
|
||||
|
||||
throw err("Expected list of string or comma separated string values.");
|
||||
}
|
||||
|
||||
public IndexSchema getSchema() {
|
||||
return parent.getSchema();
|
||||
|
@ -566,6 +661,8 @@ class FacetQueryParser extends FacetParser<FacetQuery> {
|
|||
|
||||
@Override
|
||||
public FacetQuery parse(Object arg) throws SyntaxError {
|
||||
parseCommonParams(arg);
|
||||
|
||||
String qstring = null;
|
||||
if (arg instanceof String) {
|
||||
// just the field name...
|
||||
|
@ -601,7 +698,7 @@ class FacetFieldParser extends FacetParser<FacetField> {
|
|||
}
|
||||
|
||||
public FacetField parse(Object arg) throws SyntaxError {
|
||||
|
||||
parseCommonParams(arg);
|
||||
if (arg instanceof String) {
|
||||
// just the field name...
|
||||
facet.field = (String)arg;
|
||||
|
@ -674,6 +771,8 @@ class FacetRangeParser extends FacetParser<FacetRange> {
|
|||
}
|
||||
|
||||
public FacetRange parse(Object arg) throws SyntaxError {
|
||||
parseCommonParams(arg);
|
||||
|
||||
if (!(arg instanceof Map)) {
|
||||
throw err("Missing range facet arguments");
|
||||
}
|
||||
|
|
|
@ -818,15 +818,32 @@ public class TestJsonFacets extends SolrTestCaseHS {
|
|||
);
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// multi-select / exclude tagged filters via excludeTags
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// nested query facets on subset
|
||||
client.testJQ(params(p, "q", "*:*", "fq","{!tag=abc}id:(2 3)"
|
||||
, "json.facet", "{ " +
|
||||
" f1:{query:{q:'${cat_s}:B', facet:{nj:{query:'${where_s}:NJ'}, ny:{query:'${where_s}:NY'}} , excludeTags:[xyz,qaz]}}" +
|
||||
",f2:{query:{q:'${cat_s}:B', facet:{nj:{query:'${where_s}:NJ'}, ny:{query:'${where_s}:NY'}} , excludeTags:abc }}" +
|
||||
",f3:{query:{q:'${cat_s}:B', facet:{nj:{query:'${where_s}:NJ'}, ny:{query:'${where_s}:NY'}} , excludeTags:'xyz,abc,qaz' }}" +
|
||||
",f4:{query:{q:'${cat_s}:B', facet:{nj:{query:'${where_s}:NJ'}, ny:{query:'${where_s}:NY'}} , excludeTags:[xyz , abc , qaz] }}" +
|
||||
",f5:{query:{q:'${cat_s}:B', facet:{nj:{query:'${where_s}:NJ'}, ny:{query:'${where_s}:NY'}} , excludeTags:[xyz,qaz]}}" + // this is repeated, but it did fail when a single context was shared among sub-facets
|
||||
"}"
|
||||
)
|
||||
, "facets=={ 'count':2, " +
|
||||
" 'f1':{'count':1, 'nj':{'count':1}, 'ny':{'count':0}}" +
|
||||
",'f2':{'count':3, 'nj':{'count':2}, 'ny':{'count':1}}" +
|
||||
",'f3':{'count':3, 'nj':{'count':2}, 'ny':{'count':1}}" +
|
||||
",'f4':{'count':3, 'nj':{'count':2}, 'ny':{'count':1}}" +
|
||||
",'f5':{'count':1, 'nj':{'count':1}, 'ny':{'count':0}}" +
|
||||
"}"
|
||||
);
|
||||
|
||||
|
||||
|
||||
|
||||
// TODO:
|
||||
// numdocs('query') stat (don't make a bucket... just a count)
|
||||
// missing(field)
|
||||
// make missing configurable in min, max, etc
|
||||
// exclusions
|
||||
// zeroes
|
||||
// instead of json.facet make it facet?
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue