From ecc3a5a78673664aa5d9f7900ae5b1d4cb0bc704 Mon Sep 17 00:00:00 2001 From: Yonik Seeley Date: Wed, 29 Apr 2015 13:28:33 +0000 Subject: [PATCH] SOLR-7477: implement facet excludeTags git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1676743 13f79535-47bb-0310-9956-ffa450edef68 --- solr/CHANGES.txt | 8 + .../apache/solr/search/facet/FacetField.java | 16 +- .../apache/solr/search/facet/FacetQuery.java | 1 + .../apache/solr/search/facet/FacetRange.java | 14 +- .../solr/search/facet/FacetRequest.java | 157 ++++++++++++++---- .../solr/search/facet/TestJsonFacets.java | 31 +++- 6 files changed, 179 insertions(+), 48 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index c0e6e241513..41e0111ff3d 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -153,6 +153,14 @@ New Features * SOLR-7437: Make HDFS transaction log replication factor configurable. (Mark Miller) +* SOLR-7477: Multi-select faceting support for the Facet Module via the "excludeTags" + parameter which disregards any matching tagged filters for that facet. Example: + & q=shoes + & fq={!tag=COLOR}color:blue + & json.facet={ colors:{type:terms, field:color, excludeTags=COLOR} } + (yonik) + + Bug Fixes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetField.java b/solr/core/src/java/org/apache/solr/search/facet/FacetField.java index b908ab2705b..cd2d0d0f37c 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FacetField.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetField.java @@ -194,6 +194,7 @@ abstract class FacetFieldProcessorFCBase extends FacetFieldProcessor { @Override public void process() throws IOException { + super.process(); sf = fcontext.searcher.getSchema().getField(freq.field); response = getFieldCacheCounts(); } @@ -340,10 +341,9 @@ abstract class FacetFieldProcessorFCBase extends FacetFieldProcessor { // handle sub-facets for this bucket if (freq.getSubFacets().size() > 0) { - FacetContext subContext = fcontext.sub(); - subContext.base = fcontext.searcher.getDocSet(new TermQuery(new Term(sf.getName(), br.clone())), fcontext.base); + TermQuery filter = new TermQuery(new Term(sf.getName(), br.clone())); try { - fillBucketSubs(bucket, subContext); + processSubs(bucket, filter, fcontext.searcher.getDocSet(filter, fcontext.base) ); } finally { // subContext.base.decref(); // OFF-HEAP // subContext.base = null; // do not modify context after creation... there may be deferred execution (i.e. streaming) @@ -368,13 +368,11 @@ abstract class FacetFieldProcessorFCBase extends FacetFieldProcessor { } if (freq.getSubFacets().size() > 0) { - FacetContext subContext = fcontext.sub(); // TODO: we can do better than this! if (missingDocSet == null) { missingDocSet = getFieldMissing(fcontext.searcher, fcontext.base, freq.field); } - subContext.base = missingDocSet; - fillBucketSubs(missingBucket, subContext); + processSubs(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), missingDocSet); } res.add("missing", missingBucket); @@ -542,6 +540,8 @@ class FacetFieldProcessorStream extends FacetFieldProcessor implements Closeable @Override public void process() throws IOException { + super.process(); + // We need to keep the fcontext open after processing is done (since we will be streaming in the response writer). // But if the connection is broken, we want to clean up. // fcontext.base.incref(); // OFF-HEAP @@ -790,13 +790,15 @@ class FacetFieldProcessorStream extends FacetFieldProcessor implements Closeable // OK, we have a good bucket to return... first get bucket value before moving to next term Object bucketVal = sf.getType().toObject(sf, term); + BytesRef termCopy = BytesRef.deepCopyOf(term); term = termsEnum.next(); SimpleOrderedMap bucket = new SimpleOrderedMap<>(); bucket.add("val", bucketVal); addStats(bucket, 0); if (hasSubFacets) { - processSubs(bucket, termSet); + TermQuery filter = new TermQuery(new Term(freq.field, termCopy)); + processSubs(bucket, filter, termSet); } // TODO... termSet needs to stick around for streaming sub-facets? diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetQuery.java b/solr/core/src/java/org/apache/solr/search/facet/FacetQuery.java index 95640b27505..0e25947133d 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FacetQuery.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetQuery.java @@ -52,6 +52,7 @@ class FacetQueryProcessor extends FacetProcessor { @Override public void process() throws IOException { + super.process(); response = new SimpleOrderedMap<>(); fillBucket(response, freq.q); } diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java b/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java index 24c607a3f34..74be2b8c880 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java @@ -70,6 +70,8 @@ class FacetRangeProcessor extends FacetProcessor { @Override public void process() throws IOException { + super.process(); + // Under the normal mincount=0, each shard will need to return 0 counts since we don't calculate buckets at the top level. // But if mincount>0 then our sub mincount can be set to 1. @@ -223,6 +225,9 @@ class FacetRangeProcessor extends FacetProcessor { int slotCount = rangeList.size() + otherList.size(); intersections = new DocSet[slotCount]; + filters = new Query[slotCount]; + + createAccs(fcontext.base.size(), slotCount); prepareForCollection(); @@ -261,12 +266,14 @@ class FacetRangeProcessor extends FacetProcessor { return res; } + private Query[] filters; private DocSet[] intersections; private void rangeStats(Range range, int slot) throws IOException { Query rangeQ = sf.getType().getRangeQuery(null, sf, range.low == null ? null : calc.formatValue(range.low), range.high==null ? null : calc.formatValue(range.high), range.includeLower, range.includeUpper); // TODO: specialize count only DocSet intersection = fcontext.searcher.getDocSet(rangeQ, fcontext.base); - intersections[slot] = intersection; // save for later + filters[slot] = rangeQ; + intersections[slot] = intersection; // save for later // TODO: only save if number of slots is small enough? int num = collect(intersection, slot); countAcc.incrementCount(slot, num); // TODO: roll this into collect() } @@ -275,11 +282,8 @@ class FacetRangeProcessor extends FacetProcessor { // handle sub-facets for this bucket if (freq.getSubFacets().size() > 0) { DocSet subBase = intersections[slot]; - if (subBase.size() == 0) return; - FacetContext subContext = fcontext.sub(); - subContext.base = subBase; try { - fillBucketSubs(bucket, subContext); + processSubs(bucket, filters[slot], subBase); } finally { // subContext.base.decref(); // OFF-HEAP // subContext.base = null; // do not modify context after creation... there may be deferred execution (i.e. streaming) diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java b/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java index 51d81d99e72..288d1a82d37 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java @@ -19,19 +19,26 @@ package org.apache.solr.search.facet; import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; import java.util.EnumSet; +import java.util.IdentityHashMap; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Query; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.FacetParams; import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.common.util.StrUtils; +import org.apache.solr.handler.component.ResponseBuilder; import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.request.SolrRequestInfo; import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.SchemaField; import org.apache.solr.search.DocIterator; @@ -47,7 +54,7 @@ import org.apache.solr.search.SyntaxError; public abstract class FacetRequest { protected Map facetStats; // per-bucket statistics protected Map subFacets; // list of facets - protected List excludeFilters; + protected List excludeTags; protected boolean processEmpty; public FacetRequest() { @@ -84,6 +91,7 @@ class FacetContext { QueryContext qcontext; SolrQueryRequest req; // TODO: replace with params? SolrIndexSearcher searcher; + Query filter; // TODO: keep track of as a DocSet or as a Query? DocSet base; FacetContext parent; int flags; @@ -92,15 +100,22 @@ class FacetContext { return (flags & IS_SHARD) != 0; } - public FacetContext sub() { + /** + * @param filter The filter for the bucket that resulted in this context/domain. Can be null if this is the root context. + * @param domain The resulting set of documents for this facet. + */ + public FacetContext sub(Query filter, DocSet domain) { FacetContext ctx = new FacetContext(); + ctx.parent = this; + ctx.base = domain; + ctx.filter = filter; + + // carry over from parent ctx.flags = flags; ctx.qcontext = qcontext; ctx.req = req; ctx.searcher = searcher; - ctx.base = base; - ctx.parent = this; return ctx; } } @@ -121,10 +136,69 @@ class FacetProcessor { } public void process() throws IOException { - - + handleDomainChanges(); } + protected void handleDomainChanges() throws IOException { + if (freq.excludeTags == null || freq.excludeTags.size() == 0) { + return; + } + + // TODO: somehow remove responsebuilder dependency + ResponseBuilder rb = SolrRequestInfo.getRequestInfo().getResponseBuilder(); + Map tagMap = (Map) rb.req.getContext().get("tags"); + if (tagMap == null) { + // no filters were tagged + return; + } + + IdentityHashMap excludeSet = new IdentityHashMap<>(); + for (String excludeTag : freq.excludeTags) { + Object olst = tagMap.get(excludeTag); + // tagMap has entries of List>, but subject to change in the future + if (!(olst instanceof Collection)) continue; + for (Object o : (Collection)olst) { + if (!(o instanceof QParser)) continue; + QParser qp = (QParser)o; + try { + excludeSet.put(qp.getQuery(), Boolean.TRUE); + } catch (SyntaxError syntaxError) { + // This should not happen since we should only be retrieving a previously parsed query + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, syntaxError); + } + } + } + if (excludeSet.size() == 0) return; + + List qlist = new ArrayList<>(); + + // add the base query + if (!excludeSet.containsKey(rb.getQuery())) { + qlist.add(rb.getQuery()); + } + + // add the filters + if (rb.getFilters() != null) { + for (Query q : rb.getFilters()) { + if (!excludeSet.containsKey(q)) { + qlist.add(q); + } + } + } + + // now walk back up the context tree + // TODO: we lose parent exclusions... + for (FacetContext curr = fcontext; curr != null; curr = curr.parent) { + if (curr.filter != null) { + qlist.add( curr.filter ); + } + } + + // recompute the base domain + fcontext.base = fcontext.searcher.getDocSet(qlist); + } + + public Object getResponse() { return null; } @@ -171,8 +245,19 @@ class FacetProcessor { } - protected void fillBucketSubs(SimpleOrderedMap response, FacetContext subContext) throws IOException { + protected void processSubs(SimpleOrderedMap response, Query filter, DocSet domain) throws IOException { + + // TODO: what if a zero bucket has a sub-facet with an exclusion that would yield results? + // should we check for domain-altering exclusions, or even ask the sub-facet for + // it's domain and then only skip it if it's 0? + + if (domain == null || domain.size() == 0 && !freq.processEmpty) { + return; + } + for (Map.Entry sub : freq.getSubFacets().entrySet()) { + // make a new context for each sub-facet since they can change the domain + FacetContext subContext = fcontext.sub(filter, domain); FacetProcessor subProcessor = sub.getValue().createFacetProcessor(subContext); subProcessor.process(); response.add( sub.getKey(), subProcessor.getResponse() ); @@ -235,9 +320,6 @@ class FacetProcessor { } - - - public void fillBucket(SimpleOrderedMap bucket, Query q) throws IOException { boolean needDocSet = freq.getFacetStats().size() > 0 || freq.getSubFacets().size() > 0; @@ -264,7 +346,7 @@ class FacetProcessor { try { processStats(bucket, result, (int) count); - processSubs(bucket, result); + processSubs(bucket, q, result); } finally { if (result != null) { // result.decref(); // OFF-HEAP @@ -273,23 +355,6 @@ class FacetProcessor { } } - - - - protected void processSubs(SimpleOrderedMap bucket, DocSet result) throws IOException { - // TODO: process exclusions, etc - - if (result == null || result.size() == 0 && !freq.processEmpty) { - return; - } - - FacetContext subContext = fcontext.sub(); - subContext.base = result; - - fillBucketSubs(bucket, subContext); - } - - public static DocSet getFieldMissing(SolrIndexSearcher searcher, DocSet docs, String fieldName) throws IOException { SchemaField sf = searcher.getSchema().getField(fieldName); DocSet hasVal = searcher.getDocSet(sf.getType().getRangeQuery(null, sf, null, null, false, false)); @@ -298,6 +363,14 @@ class FacetProcessor { return answer; } + public static Query getFieldMissingQuery(SolrIndexSearcher searcher, String fieldName) throws IOException { + SchemaField sf = searcher.getSchema().getField(fieldName); + Query hasVal = sf.getType().getRangeQuery(null, sf, null, null, false, false); + BooleanQuery noVal = new BooleanQuery(); + noVal.add(hasVal, BooleanClause.Occur.MUST_NOT); + return noVal; + } + } @@ -450,6 +523,14 @@ abstract class FacetParser { } + protected void parseCommonParams(Object o) { + if (o instanceof Map) { + Map m = (Map)o; + facet.excludeTags = getStringList(m, "excludeTags"); + } + } + + public String getField(Map args) { Object fieldName = args.get("field"); // TODO: pull out into defined constant if (fieldName == null) { @@ -520,6 +601,20 @@ abstract class FacetParser { return (String)o; } + public List getStringList(Map args, String paramName) { + Object o = args.get(paramName); + if (o == null) { + return null; + } + if (o instanceof List) { + return (List)o; + } + if (o instanceof String) { + return StrUtils.splitSmart((String)o, ",", true); + } + + throw err("Expected list of string or comma separated string values."); + } public IndexSchema getSchema() { return parent.getSchema(); @@ -566,6 +661,8 @@ class FacetQueryParser extends FacetParser { @Override public FacetQuery parse(Object arg) throws SyntaxError { + parseCommonParams(arg); + String qstring = null; if (arg instanceof String) { // just the field name... @@ -601,7 +698,7 @@ class FacetFieldParser extends FacetParser { } public FacetField parse(Object arg) throws SyntaxError { - + parseCommonParams(arg); if (arg instanceof String) { // just the field name... facet.field = (String)arg; @@ -674,6 +771,8 @@ class FacetRangeParser extends FacetParser { } public FacetRange parse(Object arg) throws SyntaxError { + parseCommonParams(arg); + if (!(arg instanceof Map)) { throw err("Missing range facet arguments"); } diff --git a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java index 1d17ef5843c..e444c23bffa 100644 --- a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java +++ b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java @@ -818,15 +818,32 @@ public class TestJsonFacets extends SolrTestCaseHS { ); + //////////////////////////////////////////////////////////////////////////////////////////// + // multi-select / exclude tagged filters via excludeTags + //////////////////////////////////////////////////////////////////////////////////////////// + + // nested query facets on subset + client.testJQ(params(p, "q", "*:*", "fq","{!tag=abc}id:(2 3)" + , "json.facet", "{ " + + " f1:{query:{q:'${cat_s}:B', facet:{nj:{query:'${where_s}:NJ'}, ny:{query:'${where_s}:NY'}} , excludeTags:[xyz,qaz]}}" + + ",f2:{query:{q:'${cat_s}:B', facet:{nj:{query:'${where_s}:NJ'}, ny:{query:'${where_s}:NY'}} , excludeTags:abc }}" + + ",f3:{query:{q:'${cat_s}:B', facet:{nj:{query:'${where_s}:NJ'}, ny:{query:'${where_s}:NY'}} , excludeTags:'xyz,abc,qaz' }}" + + ",f4:{query:{q:'${cat_s}:B', facet:{nj:{query:'${where_s}:NJ'}, ny:{query:'${where_s}:NY'}} , excludeTags:[xyz , abc , qaz] }}" + + ",f5:{query:{q:'${cat_s}:B', facet:{nj:{query:'${where_s}:NJ'}, ny:{query:'${where_s}:NY'}} , excludeTags:[xyz,qaz]}}" + // this is repeated, but it did fail when a single context was shared among sub-facets + "}" + ) + , "facets=={ 'count':2, " + + " 'f1':{'count':1, 'nj':{'count':1}, 'ny':{'count':0}}" + + ",'f2':{'count':3, 'nj':{'count':2}, 'ny':{'count':1}}" + + ",'f3':{'count':3, 'nj':{'count':2}, 'ny':{'count':1}}" + + ",'f4':{'count':3, 'nj':{'count':2}, 'ny':{'count':1}}" + + ",'f5':{'count':1, 'nj':{'count':1}, 'ny':{'count':0}}" + + "}" + ); + + - // TODO: - // numdocs('query') stat (don't make a bucket... just a count) - // missing(field) - // make missing configurable in min, max, etc - // exclusions - // zeroes - // instead of json.facet make it facet? }