From ecc3a5a78673664aa5d9f7900ae5b1d4cb0bc704 Mon Sep 17 00:00:00 2001
From: Yonik Seeley <yonik@apache.org>
Date: Wed, 29 Apr 2015 13:28:33 +0000
Subject: [PATCH] SOLR-7477: implement facet excludeTags

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1676743 13f79535-47bb-0310-9956-ffa450edef68
---
 solr/CHANGES.txt                              |   8 +
 .../apache/solr/search/facet/FacetField.java  |  16 +-
 .../apache/solr/search/facet/FacetQuery.java  |   1 +
 .../apache/solr/search/facet/FacetRange.java  |  14 +-
 .../solr/search/facet/FacetRequest.java       | 157 ++++++++++++++----
 .../solr/search/facet/TestJsonFacets.java     |  31 +++-
 6 files changed, 179 insertions(+), 48 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index c0e6e241513..41e0111ff3d 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -153,6 +153,14 @@ New Features
 
 * SOLR-7437: Make HDFS transaction log replication factor configurable. (Mark Miller)
 
+* SOLR-7477: Multi-select faceting support for the Facet Module via the "excludeTags"
+  parameter which disregards any matching tagged filters for that facet.  Example:
+  & q=shoes
+  & fq={!tag=COLOR}color:blue
+  & json.facet={ colors:{type:terms, field:color, excludeTags=COLOR} } 
+  (yonik)
+
+
 Bug Fixes
 ----------------------
 
diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetField.java b/solr/core/src/java/org/apache/solr/search/facet/FacetField.java
index b908ab2705b..cd2d0d0f37c 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/FacetField.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetField.java
@@ -194,6 +194,7 @@ abstract class FacetFieldProcessorFCBase extends FacetFieldProcessor {
 
   @Override
   public void process() throws IOException {
+    super.process();
     sf = fcontext.searcher.getSchema().getField(freq.field);
     response = getFieldCacheCounts();
   }
@@ -340,10 +341,9 @@ abstract class FacetFieldProcessorFCBase extends FacetFieldProcessor {
 
       // handle sub-facets for this bucket
       if (freq.getSubFacets().size() > 0) {
-        FacetContext subContext = fcontext.sub();
-        subContext.base = fcontext.searcher.getDocSet(new TermQuery(new Term(sf.getName(), br.clone())), fcontext.base);
+        TermQuery filter = new TermQuery(new Term(sf.getName(), br.clone()));
         try {
-          fillBucketSubs(bucket, subContext);
+          processSubs(bucket, filter, fcontext.searcher.getDocSet(filter, fcontext.base) );
         } finally {
           // subContext.base.decref();  // OFF-HEAP
           // subContext.base = null;  // do not modify context after creation... there may be deferred execution (i.e. streaming)
@@ -368,13 +368,11 @@ abstract class FacetFieldProcessorFCBase extends FacetFieldProcessor {
         }
 
         if (freq.getSubFacets().size() > 0) {
-          FacetContext subContext = fcontext.sub();
           // TODO: we can do better than this!
           if (missingDocSet == null) {
             missingDocSet = getFieldMissing(fcontext.searcher, fcontext.base, freq.field);
           }
-          subContext.base = missingDocSet;
-          fillBucketSubs(missingBucket, subContext);
+          processSubs(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), missingDocSet);
         }
 
         res.add("missing", missingBucket);
@@ -542,6 +540,8 @@ class FacetFieldProcessorStream extends FacetFieldProcessor implements Closeable
 
   @Override
   public void process() throws IOException {
+    super.process();
+
     // We need to keep the fcontext open after processing is done (since we will be streaming in the response writer).
     // But if the connection is broken, we want to clean up.
     // fcontext.base.incref();  // OFF-HEAP
@@ -790,13 +790,15 @@ class FacetFieldProcessorStream extends FacetFieldProcessor implements Closeable
 
         // OK, we have a good bucket to return... first get bucket value before moving to next term
         Object bucketVal = sf.getType().toObject(sf, term);
+        BytesRef termCopy = BytesRef.deepCopyOf(term);
         term = termsEnum.next();
 
         SimpleOrderedMap<Object> bucket = new SimpleOrderedMap<>();
         bucket.add("val", bucketVal);
         addStats(bucket, 0);
         if (hasSubFacets) {
-          processSubs(bucket, termSet);
+          TermQuery filter = new TermQuery(new Term(freq.field, termCopy));
+          processSubs(bucket, filter, termSet);
         }
 
         // TODO... termSet needs to stick around for streaming sub-facets?
diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetQuery.java b/solr/core/src/java/org/apache/solr/search/facet/FacetQuery.java
index 95640b27505..0e25947133d 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/FacetQuery.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetQuery.java
@@ -52,6 +52,7 @@ class FacetQueryProcessor extends FacetProcessor<FacetQuery> {
 
   @Override
   public void process() throws IOException {
+    super.process();
     response = new SimpleOrderedMap<>();
     fillBucket(response, freq.q);
   }
diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java b/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java
index 24c607a3f34..74be2b8c880 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java
@@ -70,6 +70,8 @@ class FacetRangeProcessor extends FacetProcessor<FacetRange> {
 
   @Override
   public void process() throws IOException {
+    super.process();
+
     // Under the normal mincount=0, each shard will need to return 0 counts since we don't calculate buckets at the top level.
     // But if mincount>0 then our sub mincount can be set to 1.
 
@@ -223,6 +225,9 @@ class FacetRangeProcessor extends FacetProcessor<FacetRange> {
 
     int slotCount = rangeList.size() + otherList.size();
     intersections = new DocSet[slotCount];
+    filters = new Query[slotCount];
+
+
     createAccs(fcontext.base.size(), slotCount);
     prepareForCollection();
 
@@ -261,12 +266,14 @@ class FacetRangeProcessor extends FacetProcessor<FacetRange> {
     return res;
   }
 
+  private Query[] filters;
   private DocSet[] intersections;
   private void rangeStats(Range range, int slot) throws IOException {
     Query rangeQ = sf.getType().getRangeQuery(null, sf, range.low == null ? null : calc.formatValue(range.low), range.high==null ? null : calc.formatValue(range.high), range.includeLower, range.includeUpper);
     // TODO: specialize count only
     DocSet intersection = fcontext.searcher.getDocSet(rangeQ, fcontext.base);
-    intersections[slot] = intersection;  // save for later
+    filters[slot] = rangeQ;
+    intersections[slot] = intersection;  // save for later  // TODO: only save if number of slots is small enough?
     int num = collect(intersection, slot);
     countAcc.incrementCount(slot, num); // TODO: roll this into collect()
   }
@@ -275,11 +282,8 @@ class FacetRangeProcessor extends FacetProcessor<FacetRange> {
     // handle sub-facets for this bucket
     if (freq.getSubFacets().size() > 0) {
       DocSet subBase = intersections[slot];
-      if (subBase.size() == 0) return;
-      FacetContext subContext = fcontext.sub();
-      subContext.base = subBase;
       try {
-        fillBucketSubs(bucket, subContext);
+        processSubs(bucket, filters[slot], subBase);
       } finally {
         // subContext.base.decref();  // OFF-HEAP
         // subContext.base = null;  // do not modify context after creation... there may be deferred execution (i.e. streaming)
diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java b/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java
index 51d81d99e72..288d1a82d37 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java
@@ -19,19 +19,26 @@ package org.apache.solr.search.facet;
 
 
 import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
 import java.util.EnumSet;
+import java.util.IdentityHashMap;
 import java.util.Iterator;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
 
 import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.Query;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.params.FacetParams;
 import org.apache.solr.common.util.SimpleOrderedMap;
 import org.apache.solr.common.util.StrUtils;
+import org.apache.solr.handler.component.ResponseBuilder;
 import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.request.SolrRequestInfo;
 import org.apache.solr.schema.IndexSchema;
 import org.apache.solr.schema.SchemaField;
 import org.apache.solr.search.DocIterator;
@@ -47,7 +54,7 @@ import org.apache.solr.search.SyntaxError;
 public abstract class FacetRequest {
   protected Map<String,AggValueSource> facetStats;  // per-bucket statistics
   protected Map<String,FacetRequest> subFacets;     // list of facets
-  protected List<String> excludeFilters;
+  protected List<String> excludeTags;
   protected boolean processEmpty;
 
   public FacetRequest() {
@@ -84,6 +91,7 @@ class FacetContext {
   QueryContext qcontext;
   SolrQueryRequest req;  // TODO: replace with params?
   SolrIndexSearcher searcher;
+  Query filter;  // TODO: keep track of as a DocSet or as a Query?
   DocSet base;
   FacetContext parent;
   int flags;
@@ -92,15 +100,22 @@ class FacetContext {
     return (flags & IS_SHARD) != 0;
   }
 
-  public FacetContext sub() {
+  /**
+   * @param filter The filter for the bucket that resulted in this context/domain.  Can be null if this is the root context.
+   * @param domain The resulting set of documents for this facet.
+   */
+  public FacetContext sub(Query filter, DocSet domain) {
     FacetContext ctx = new FacetContext();
+    ctx.parent = this;
+    ctx.base = domain;
+    ctx.filter = filter;
+
+    // carry over from parent
     ctx.flags = flags;
     ctx.qcontext = qcontext;
     ctx.req = req;
     ctx.searcher = searcher;
-    ctx.base = base;
 
-    ctx.parent = this;
     return ctx;
   }
 }
@@ -121,10 +136,69 @@ class FacetProcessor<FacetRequestT extends FacetRequest>  {
   }
 
   public void process() throws IOException {
-
-
+    handleDomainChanges();
   }
 
+  protected void handleDomainChanges() throws IOException {
+    if (freq.excludeTags == null || freq.excludeTags.size() == 0) {
+      return;
+    }
+
+    // TODO: somehow remove responsebuilder dependency
+    ResponseBuilder rb = SolrRequestInfo.getRequestInfo().getResponseBuilder();
+    Map tagMap = (Map) rb.req.getContext().get("tags");
+    if (tagMap == null) {
+      // no filters were tagged
+      return;
+    }
+
+    IdentityHashMap<Query,Boolean> excludeSet = new IdentityHashMap<>();
+    for (String excludeTag : freq.excludeTags) {
+      Object olst = tagMap.get(excludeTag);
+      // tagMap has entries of List<String,List<QParser>>, but subject to change in the future
+      if (!(olst instanceof Collection)) continue;
+      for (Object o : (Collection<?>)olst) {
+        if (!(o instanceof QParser)) continue;
+        QParser qp = (QParser)o;
+        try {
+          excludeSet.put(qp.getQuery(), Boolean.TRUE);
+        } catch (SyntaxError syntaxError) {
+          // This should not happen since we should only be retrieving a previously parsed query
+          throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, syntaxError);
+        }
+      }
+    }
+    if (excludeSet.size() == 0) return;
+
+    List<Query> qlist = new ArrayList<>();
+
+    // add the base query
+    if (!excludeSet.containsKey(rb.getQuery())) {
+      qlist.add(rb.getQuery());
+    }
+
+    // add the filters
+    if (rb.getFilters() != null) {
+      for (Query q : rb.getFilters()) {
+        if (!excludeSet.containsKey(q)) {
+          qlist.add(q);
+        }
+      }
+    }
+
+    // now walk back up the context tree
+    // TODO: we lose parent exclusions...
+    for (FacetContext curr = fcontext; curr != null; curr = curr.parent) {
+      if (curr.filter != null) {
+        qlist.add( curr.filter );
+      }
+    }
+
+    // recompute the base domain
+    fcontext.base = fcontext.searcher.getDocSet(qlist);
+  }
+
+
   public Object getResponse() {
     return null;
   }
@@ -171,8 +245,19 @@ class FacetProcessor<FacetRequestT extends FacetRequest>  {
   }
 
 
-  protected void fillBucketSubs(SimpleOrderedMap<Object> response, FacetContext subContext) throws IOException {
+  protected void processSubs(SimpleOrderedMap<Object> response, Query filter, DocSet domain) throws IOException {
+
+    // TODO: what if a zero bucket has a sub-facet with an exclusion that would yield results?
+    // should we check for domain-altering exclusions, or even ask the sub-facet for
+    // it's domain and then only skip it if it's 0?
+
+    if (domain == null || domain.size() == 0 && !freq.processEmpty) {
+      return;
+    }
+
     for (Map.Entry<String,FacetRequest> sub : freq.getSubFacets().entrySet()) {
+      // make a new context for each sub-facet since they can change the domain
+      FacetContext subContext = fcontext.sub(filter, domain);
       FacetProcessor subProcessor = sub.getValue().createFacetProcessor(subContext);
       subProcessor.process();
       response.add( sub.getKey(), subProcessor.getResponse() );
@@ -235,9 +320,6 @@ class FacetProcessor<FacetRequestT extends FacetRequest>  {
   }
 
 
-
-
-
   public void fillBucket(SimpleOrderedMap<Object> bucket, Query q) throws IOException {
     boolean needDocSet = freq.getFacetStats().size() > 0 || freq.getSubFacets().size() > 0;
 
@@ -264,7 +346,7 @@ class FacetProcessor<FacetRequestT extends FacetRequest>  {
 
     try {
       processStats(bucket, result, (int) count);
-      processSubs(bucket, result);
+      processSubs(bucket, q, result);
     } finally {
       if (result != null) {
         // result.decref(); // OFF-HEAP
@@ -273,23 +355,6 @@ class FacetProcessor<FacetRequestT extends FacetRequest>  {
     }
   }
 
-
-
-
-  protected void processSubs(SimpleOrderedMap<Object> bucket, DocSet result) throws IOException {
-    // TODO: process exclusions, etc
-
-    if (result == null || result.size() == 0 && !freq.processEmpty) {
-      return;
-    }
-
-    FacetContext subContext = fcontext.sub();
-    subContext.base = result;
-
-    fillBucketSubs(bucket, subContext);
-  }
-
-
   public static DocSet getFieldMissing(SolrIndexSearcher searcher, DocSet docs, String fieldName) throws IOException {
     SchemaField sf = searcher.getSchema().getField(fieldName);
     DocSet hasVal = searcher.getDocSet(sf.getType().getRangeQuery(null, sf, null, null, false, false));
@@ -298,6 +363,14 @@ class FacetProcessor<FacetRequestT extends FacetRequest>  {
     return answer;
   }
 
+  public static Query getFieldMissingQuery(SolrIndexSearcher searcher, String fieldName) throws IOException {
+    SchemaField sf = searcher.getSchema().getField(fieldName);
+    Query hasVal = sf.getType().getRangeQuery(null, sf, null, null, false, false);
+    BooleanQuery noVal = new BooleanQuery();
+    noVal.add(hasVal, BooleanClause.Occur.MUST_NOT);
+    return noVal;
+  }
+
 }
 
 
@@ -450,6 +523,14 @@ abstract class FacetParser<FacetRequestT extends FacetRequest> {
   }
 
 
+  protected void parseCommonParams(Object o) {
+    if (o instanceof Map) {
+      Map<String,Object> m = (Map<String,Object>)o;
+      facet.excludeTags = getStringList(m, "excludeTags");
+    }
+  }
+
+
   public String getField(Map<String,Object> args) {
     Object fieldName = args.get("field"); // TODO: pull out into defined constant
     if (fieldName == null) {
@@ -520,6 +601,20 @@ abstract class FacetParser<FacetRequestT extends FacetRequest> {
     return (String)o;
   }
 
+  public List<String> getStringList(Map<String,Object> args, String paramName) {
+    Object o = args.get(paramName);
+    if (o == null) {
+      return null;
+    }
+    if (o instanceof List) {
+      return (List<String>)o;
+    }
+    if (o instanceof String) {
+      return StrUtils.splitSmart((String)o, ",", true);
+    }
+
+    throw err("Expected list of string or comma separated string values.");
+  }
 
   public IndexSchema getSchema() {
     return parent.getSchema();
@@ -566,6 +661,8 @@ class FacetQueryParser extends FacetParser<FacetQuery> {
 
   @Override
   public FacetQuery parse(Object arg) throws SyntaxError {
+    parseCommonParams(arg);
+
     String qstring = null;
     if (arg instanceof String) {
       // just the field name...
@@ -601,7 +698,7 @@ class FacetFieldParser extends FacetParser<FacetField> {
   }
 
   public FacetField parse(Object arg) throws SyntaxError {
-
+    parseCommonParams(arg);
     if (arg instanceof String) {
       // just the field name...
       facet.field = (String)arg;
@@ -674,6 +771,8 @@ class FacetRangeParser extends FacetParser<FacetRange> {
   }
 
   public FacetRange parse(Object arg) throws SyntaxError {
+    parseCommonParams(arg);
+
     if (!(arg instanceof Map)) {
       throw err("Missing range facet arguments");
     }
diff --git a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
index 1d17ef5843c..e444c23bffa 100644
--- a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
+++ b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
@@ -818,15 +818,32 @@ public class TestJsonFacets extends SolrTestCaseHS {
     );
 
 
+    ////////////////////////////////////////////////////////////////////////////////////////////
+    // multi-select / exclude tagged filters via excludeTags
+    ////////////////////////////////////////////////////////////////////////////////////////////
+
+    // nested query facets on subset
+    client.testJQ(params(p, "q", "*:*", "fq","{!tag=abc}id:(2 3)"
+            , "json.facet", "{ " +
+                " f1:{query:{q:'${cat_s}:B', facet:{nj:{query:'${where_s}:NJ'}, ny:{query:'${where_s}:NY'}} , excludeTags:[xyz,qaz]}}" +
+                ",f2:{query:{q:'${cat_s}:B', facet:{nj:{query:'${where_s}:NJ'}, ny:{query:'${where_s}:NY'}} , excludeTags:abc }}" +
+                ",f3:{query:{q:'${cat_s}:B', facet:{nj:{query:'${where_s}:NJ'}, ny:{query:'${where_s}:NY'}} , excludeTags:'xyz,abc,qaz' }}" +
+                ",f4:{query:{q:'${cat_s}:B', facet:{nj:{query:'${where_s}:NJ'}, ny:{query:'${where_s}:NY'}} , excludeTags:[xyz , abc , qaz] }}" +
+                ",f5:{query:{q:'${cat_s}:B', facet:{nj:{query:'${where_s}:NJ'}, ny:{query:'${where_s}:NY'}} , excludeTags:[xyz,qaz]}}" +    // this is repeated, but it did fail when a single context was shared among sub-facets
+                "}"
+        )
+        , "facets=={ 'count':2, " +
+            " 'f1':{'count':1, 'nj':{'count':1}, 'ny':{'count':0}}" +
+            ",'f2':{'count':3, 'nj':{'count':2}, 'ny':{'count':1}}" +
+            ",'f3':{'count':3, 'nj':{'count':2}, 'ny':{'count':1}}" +
+            ",'f4':{'count':3, 'nj':{'count':2}, 'ny':{'count':1}}" +
+            ",'f5':{'count':1, 'nj':{'count':1}, 'ny':{'count':0}}" +
+            "}"
+    );
+
+
 
 
-    // TODO:
-    // numdocs('query') stat (don't make a bucket... just a count)
-    // missing(field)
-    // make missing configurable in min, max, etc
-    // exclusions
-    // zeroes
-    // instead of json.facet make it facet?
   }