SOLR-7477: implement facet excludeTags

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1676743 13f79535-47bb-0310-9956-ffa450edef68
2015-04-29 13:28:33 +00:00 · 2015-04-29 13:28:33 +00:00 · ecc3a5a786
parent 7d48921099
commit ecc3a5a786
6 changed files with 179 additions and 48 deletions
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -153,6 +153,14 @@ New Features

 * SOLR-7437: Make HDFS transaction log replication factor configurable. (Mark Miller)

+* SOLR-7477: Multi-select faceting support for the Facet Module via the "excludeTags"
+  parameter which disregards any matching tagged filters for that facet.  Example:
+  & q=shoes
+  & fq={!tag=COLOR}color:blue
+  & json.facet={ colors:{type:terms, field:color, excludeTags=COLOR} } 
+  (yonik)
+
+
 Bug Fixes
 ----------------------

--- a/solr/core/src/java/org/apache/solr/search/facet/FacetField.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetField.java
@ -194,6 +194,7 @@ abstract class FacetFieldProcessorFCBase extends FacetFieldProcessor {

  @Override
  public void process() throws IOException {
+    super.process();
    sf = fcontext.searcher.getSchema().getField(freq.field);
    response = getFieldCacheCounts();
  }
@ -340,10 +341,9 @@ abstract class FacetFieldProcessorFCBase extends FacetFieldProcessor {

      // handle sub-facets for this bucket
      if (freq.getSubFacets().size() > 0) {
-        FacetContext subContext = fcontext.sub();
-        subContext.base = fcontext.searcher.getDocSet(new TermQuery(new Term(sf.getName(), br.clone())), fcontext.base);
+        TermQuery filter = new TermQuery(new Term(sf.getName(), br.clone()));
        try {
-          fillBucketSubs(bucket, subContext);
+          processSubs(bucket, filter, fcontext.searcher.getDocSet(filter, fcontext.base) );
        } finally {
          // subContext.base.decref();  // OFF-HEAP
          // subContext.base = null;  // do not modify context after creation... there may be deferred execution (i.e. streaming)
@ -368,13 +368,11 @@ abstract class FacetFieldProcessorFCBase extends FacetFieldProcessor {
        }

        if (freq.getSubFacets().size() > 0) {
-          FacetContext subContext = fcontext.sub();
          // TODO: we can do better than this!
          if (missingDocSet == null) {
            missingDocSet = getFieldMissing(fcontext.searcher, fcontext.base, freq.field);
          }
-          subContext.base = missingDocSet;
-          fillBucketSubs(missingBucket, subContext);
+          processSubs(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), missingDocSet);
        }

        res.add("missing", missingBucket);
@ -542,6 +540,8 @@ class FacetFieldProcessorStream extends FacetFieldProcessor implements Closeable

  @Override
  public void process() throws IOException {
+    super.process();
+
    // We need to keep the fcontext open after processing is done (since we will be streaming in the response writer).
    // But if the connection is broken, we want to clean up.
    // fcontext.base.incref();  // OFF-HEAP
@ -790,13 +790,15 @@ class FacetFieldProcessorStream extends FacetFieldProcessor implements Closeable

        // OK, we have a good bucket to return... first get bucket value before moving to next term
        Object bucketVal = sf.getType().toObject(sf, term);
+        BytesRef termCopy = BytesRef.deepCopyOf(term);
        term = termsEnum.next();

        SimpleOrderedMap<Object> bucket = new SimpleOrderedMap<>();
        bucket.add("val", bucketVal);
        addStats(bucket, 0);
        if (hasSubFacets) {
-          processSubs(bucket, termSet);
+          TermQuery filter = new TermQuery(new Term(freq.field, termCopy));
+          processSubs(bucket, filter, termSet);
        }

        // TODO... termSet needs to stick around for streaming sub-facets?
--- a/solr/core/src/java/org/apache/solr/search/facet/FacetQuery.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetQuery.java
@ -52,6 +52,7 @@ class FacetQueryProcessor extends FacetProcessor<FacetQuery> {

  @Override
  public void process() throws IOException {
+    super.process();
    response = new SimpleOrderedMap<>();
    fillBucket(response, freq.q);
  }
--- a/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java
@ -70,6 +70,8 @@ class FacetRangeProcessor extends FacetProcessor<FacetRange> {

  @Override
  public void process() throws IOException {
+    super.process();
+
    // Under the normal mincount=0, each shard will need to return 0 counts since we don't calculate buckets at the top level.
    // But if mincount>0 then our sub mincount can be set to 1.

@ -223,6 +225,9 @@ class FacetRangeProcessor extends FacetProcessor<FacetRange> {

    int slotCount = rangeList.size() + otherList.size();
    intersections = new DocSet[slotCount];
+    filters = new Query[slotCount];
+
+
    createAccs(fcontext.base.size(), slotCount);
    prepareForCollection();

@ -261,12 +266,14 @@ class FacetRangeProcessor extends FacetProcessor<FacetRange> {
    return res;
  }

+  private Query[] filters;
  private DocSet[] intersections;
  private void rangeStats(Range range, int slot) throws IOException {
    Query rangeQ = sf.getType().getRangeQuery(null, sf, range.low == null ? null : calc.formatValue(range.low), range.high==null ? null : calc.formatValue(range.high), range.includeLower, range.includeUpper);
    // TODO: specialize count only
    DocSet intersection = fcontext.searcher.getDocSet(rangeQ, fcontext.base);
-    intersections[slot] = intersection;  // save for later
+    filters[slot] = rangeQ;
+    intersections[slot] = intersection;  // save for later  // TODO: only save if number of slots is small enough?
    int num = collect(intersection, slot);
    countAcc.incrementCount(slot, num); // TODO: roll this into collect()
  }
@ -275,11 +282,8 @@ class FacetRangeProcessor extends FacetProcessor<FacetRange> {
    // handle sub-facets for this bucket
    if (freq.getSubFacets().size() > 0) {
      DocSet subBase = intersections[slot];
-      if (subBase.size() == 0) return;
-      FacetContext subContext = fcontext.sub();
-      subContext.base = subBase;
      try {
-        fillBucketSubs(bucket, subContext);
+        processSubs(bucket, filters[slot], subBase);
      } finally {
        // subContext.base.decref();  // OFF-HEAP
        // subContext.base = null;  // do not modify context after creation... there may be deferred execution (i.e. streaming)
--- a/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java
@ -19,19 +19,26 @@ package org.apache.solr.search.facet;


 import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
 import java.util.EnumSet;
+import java.util.IdentityHashMap;
 import java.util.Iterator;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;

 import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.Query;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.params.FacetParams;
 import org.apache.solr.common.util.SimpleOrderedMap;
 import org.apache.solr.common.util.StrUtils;
+import org.apache.solr.handler.component.ResponseBuilder;
 import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.request.SolrRequestInfo;
 import org.apache.solr.schema.IndexSchema;
 import org.apache.solr.schema.SchemaField;
 import org.apache.solr.search.DocIterator;
@ -47,7 +54,7 @@ import org.apache.solr.search.SyntaxError;
 public abstract class FacetRequest {
  protected Map<String,AggValueSource> facetStats;  // per-bucket statistics
  protected Map<String,FacetRequest> subFacets;     // list of facets
-  protected List<String> excludeFilters;
+  protected List<String> excludeTags;
  protected boolean processEmpty;

  public FacetRequest() {
@ -84,6 +91,7 @@ class FacetContext {
  QueryContext qcontext;
  SolrQueryRequest req;  // TODO: replace with params?
  SolrIndexSearcher searcher;
+  Query filter;  // TODO: keep track of as a DocSet or as a Query?
  DocSet base;
  FacetContext parent;
  int flags;
@ -92,15 +100,22 @@ class FacetContext {
    return (flags & IS_SHARD) != 0;
  }

-  public FacetContext sub() {
+  /**
+   * @param filter The filter for the bucket that resulted in this context/domain.  Can be null if this is the root context.
+   * @param domain The resulting set of documents for this facet.
+   */
+  public FacetContext sub(Query filter, DocSet domain) {
    FacetContext ctx = new FacetContext();
+    ctx.parent = this;
+    ctx.base = domain;
+    ctx.filter = filter;
+
+    // carry over from parent
    ctx.flags = flags;
    ctx.qcontext = qcontext;
    ctx.req = req;
    ctx.searcher = searcher;
-    ctx.base = base;

-    ctx.parent = this;
    return ctx;
  }
 }
@ -121,10 +136,69 @@ class FacetProcessor<FacetRequestT extends FacetRequest>  {
  }

  public void process() throws IOException {
-
-
+    handleDomainChanges();
  }

+  protected void handleDomainChanges() throws IOException {
+    if (freq.excludeTags == null || freq.excludeTags.size() == 0) {
+      return;
+    }
+
+    // TODO: somehow remove responsebuilder dependency
+    ResponseBuilder rb = SolrRequestInfo.getRequestInfo().getResponseBuilder();
+    Map tagMap = (Map) rb.req.getContext().get("tags");
+    if (tagMap == null) {
+      // no filters were tagged
+      return;
+    }
+
+    IdentityHashMap<Query,Boolean> excludeSet = new IdentityHashMap<>();
+    for (String excludeTag : freq.excludeTags) {
+      Object olst = tagMap.get(excludeTag);
+      // tagMap has entries of List<String,List<QParser>>, but subject to change in the future
+      if (!(olst instanceof Collection)) continue;
+      for (Object o : (Collection<?>)olst) {
+        if (!(o instanceof QParser)) continue;
+        QParser qp = (QParser)o;
+        try {
+          excludeSet.put(qp.getQuery(), Boolean.TRUE);
+        } catch (SyntaxError syntaxError) {
+          // This should not happen since we should only be retrieving a previously parsed query
+          throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, syntaxError);
+        }
+      }
+    }
+    if (excludeSet.size() == 0) return;
+
+    List<Query> qlist = new ArrayList<>();
+
+    // add the base query
+    if (!excludeSet.containsKey(rb.getQuery())) {
+      qlist.add(rb.getQuery());
+    }
+
+    // add the filters
+    if (rb.getFilters() != null) {
+      for (Query q : rb.getFilters()) {
+        if (!excludeSet.containsKey(q)) {
+          qlist.add(q);
+        }
+      }
+    }
+
+    // now walk back up the context tree
+    // TODO: we lose parent exclusions...
+    for (FacetContext curr = fcontext; curr != null; curr = curr.parent) {
+      if (curr.filter != null) {
+        qlist.add( curr.filter );
+      }
+    }
+
+    // recompute the base domain
+    fcontext.base = fcontext.searcher.getDocSet(qlist);
+  }
+
+
  public Object getResponse() {
    return null;
  }
@ -171,8 +245,19 @@ class FacetProcessor<FacetRequestT extends FacetRequest>  {
  }


-  protected void fillBucketSubs(SimpleOrderedMap<Object> response, FacetContext subContext) throws IOException {
+  protected void processSubs(SimpleOrderedMap<Object> response, Query filter, DocSet domain) throws IOException {
+
+    // TODO: what if a zero bucket has a sub-facet with an exclusion that would yield results?
+    // should we check for domain-altering exclusions, or even ask the sub-facet for
+    // it's domain and then only skip it if it's 0?
+
+    if (domain == null || domain.size() == 0 && !freq.processEmpty) {
+      return;
+    }
+
    for (Map.Entry<String,FacetRequest> sub : freq.getSubFacets().entrySet()) {
+      // make a new context for each sub-facet since they can change the domain
+      FacetContext subContext = fcontext.sub(filter, domain);
      FacetProcessor subProcessor = sub.getValue().createFacetProcessor(subContext);
      subProcessor.process();
      response.add( sub.getKey(), subProcessor.getResponse() );
@ -235,9 +320,6 @@ class FacetProcessor<FacetRequestT extends FacetRequest>  {
  }


-
-
-
  public void fillBucket(SimpleOrderedMap<Object> bucket, Query q) throws IOException {
    boolean needDocSet = freq.getFacetStats().size() > 0 || freq.getSubFacets().size() > 0;

@ -264,7 +346,7 @@ class FacetProcessor<FacetRequestT extends FacetRequest>  {

    try {
      processStats(bucket, result, (int) count);
-      processSubs(bucket, result);
+      processSubs(bucket, q, result);
    } finally {
      if (result != null) {
        // result.decref(); // OFF-HEAP
@ -273,23 +355,6 @@ class FacetProcessor<FacetRequestT extends FacetRequest>  {
    }
  }

-
-
-
-  protected void processSubs(SimpleOrderedMap<Object> bucket, DocSet result) throws IOException {
-    // TODO: process exclusions, etc
-
-    if (result == null || result.size() == 0 && !freq.processEmpty) {
-      return;
-    }
-
-    FacetContext subContext = fcontext.sub();
-    subContext.base = result;
-
-    fillBucketSubs(bucket, subContext);
-  }
-
-
  public static DocSet getFieldMissing(SolrIndexSearcher searcher, DocSet docs, String fieldName) throws IOException {
    SchemaField sf = searcher.getSchema().getField(fieldName);
    DocSet hasVal = searcher.getDocSet(sf.getType().getRangeQuery(null, sf, null, null, false, false));
@ -298,6 +363,14 @@ class FacetProcessor<FacetRequestT extends FacetRequest>  {
    return answer;
  }

+  public static Query getFieldMissingQuery(SolrIndexSearcher searcher, String fieldName) throws IOException {
+    SchemaField sf = searcher.getSchema().getField(fieldName);
+    Query hasVal = sf.getType().getRangeQuery(null, sf, null, null, false, false);
+    BooleanQuery noVal = new BooleanQuery();
+    noVal.add(hasVal, BooleanClause.Occur.MUST_NOT);
+    return noVal;
+  }
+
 }


@ -450,6 +523,14 @@ abstract class FacetParser<FacetRequestT extends FacetRequest> {
  }


+  protected void parseCommonParams(Object o) {
+    if (o instanceof Map) {
+      Map<String,Object> m = (Map<String,Object>)o;
+      facet.excludeTags = getStringList(m, "excludeTags");
+    }
+  }
+
+
  public String getField(Map<String,Object> args) {
    Object fieldName = args.get("field"); // TODO: pull out into defined constant
    if (fieldName == null) {
@ -520,6 +601,20 @@ abstract class FacetParser<FacetRequestT extends FacetRequest> {
    return (String)o;
  }

+  public List<String> getStringList(Map<String,Object> args, String paramName) {
+    Object o = args.get(paramName);
+    if (o == null) {
+      return null;
+    }
+    if (o instanceof List) {
+      return (List<String>)o;
+    }
+    if (o instanceof String) {
+      return StrUtils.splitSmart((String)o, ",", true);
+    }
+
+    throw err("Expected list of string or comma separated string values.");
+  }

  public IndexSchema getSchema() {
    return parent.getSchema();
@ -566,6 +661,8 @@ class FacetQueryParser extends FacetParser<FacetQuery> {

  @Override
  public FacetQuery parse(Object arg) throws SyntaxError {
+    parseCommonParams(arg);
+
    String qstring = null;
    if (arg instanceof String) {
      // just the field name...
@ -601,7 +698,7 @@ class FacetFieldParser extends FacetParser<FacetField> {
  }

  public FacetField parse(Object arg) throws SyntaxError {
-
+    parseCommonParams(arg);
    if (arg instanceof String) {
      // just the field name...
      facet.field = (String)arg;
@ -674,6 +771,8 @@ class FacetRangeParser extends FacetParser<FacetRange> {
  }

  public FacetRange parse(Object arg) throws SyntaxError {
+    parseCommonParams(arg);
+
    if (!(arg instanceof Map)) {
      throw err("Missing range facet arguments");
    }
--- a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
+++ b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
@ -818,15 +818,32 @@ public class TestJsonFacets extends SolrTestCaseHS {
    );


+    ////////////////////////////////////////////////////////////////////////////////////////////
+    // multi-select / exclude tagged filters via excludeTags
+    ////////////////////////////////////////////////////////////////////////////////////////////
+
+    // nested query facets on subset
+    client.testJQ(params(p, "q", "*:*", "fq","{!tag=abc}id:(2 3)"
+            , "json.facet", "{ " +
+                " f1:{query:{q:'${cat_s}:B', facet:{nj:{query:'${where_s}:NJ'}, ny:{query:'${where_s}:NY'}} , excludeTags:[xyz,qaz]}}" +
+                ",f2:{query:{q:'${cat_s}:B', facet:{nj:{query:'${where_s}:NJ'}, ny:{query:'${where_s}:NY'}} , excludeTags:abc }}" +
+                ",f3:{query:{q:'${cat_s}:B', facet:{nj:{query:'${where_s}:NJ'}, ny:{query:'${where_s}:NY'}} , excludeTags:'xyz,abc,qaz' }}" +
+                ",f4:{query:{q:'${cat_s}:B', facet:{nj:{query:'${where_s}:NJ'}, ny:{query:'${where_s}:NY'}} , excludeTags:[xyz , abc , qaz] }}" +
+                ",f5:{query:{q:'${cat_s}:B', facet:{nj:{query:'${where_s}:NJ'}, ny:{query:'${where_s}:NY'}} , excludeTags:[xyz,qaz]}}" +    // this is repeated, but it did fail when a single context was shared among sub-facets
+                "}"
+        )
+        , "facets=={ 'count':2, " +
+            " 'f1':{'count':1, 'nj':{'count':1}, 'ny':{'count':0}}" +
+            ",'f2':{'count':3, 'nj':{'count':2}, 'ny':{'count':1}}" +
+            ",'f3':{'count':3, 'nj':{'count':2}, 'ny':{'count':1}}" +
+            ",'f4':{'count':3, 'nj':{'count':2}, 'ny':{'count':1}}" +
+            ",'f5':{'count':1, 'nj':{'count':1}, 'ny':{'count':0}}" +
+            "}"
+    );
+
+


-    // TODO:
-    // numdocs('query') stat (don't make a bucket... just a count)
-    // missing(field)
-    // make missing configurable in min, max, etc
-    // exclusions
-    // zeroes
-    // instead of json.facet make it facet?
  }