SOLR-9432: json facet refinement progress, test refinement info going to shards

2016-10-11 17:13:36 -04:00 · 2016-10-11 17:13:36 -04:00 · 0f08ad9ad3
parent 7660dde458
commit 0f08ad9ad3
4 changed files with 426 additions and 172 deletions
--- a/solr/core/src/java/org/apache/solr/search/facet/FacetBucket.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetBucket.java
@ -0,0 +1,189 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.search.facet;
+
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.solr.common.util.SimpleOrderedMap;
+
+public class FacetBucket {
+  final FacetBucketMerger parent;
+  final Comparable bucketValue;
+  final int bucketNumber;  // this is just for internal correlation (the first bucket created is bucket 0, the next bucket 1, across all field buckets)
+
+  long count;
+  Map<String, FacetMerger> subs;
+
+  public FacetBucket(FacetBucketMerger parent, Comparable bucketValue, FacetMerger.Context mcontext) {
+    this.parent = parent;
+    this.bucketValue = bucketValue;
+    this.bucketNumber = mcontext.getNewBucketNumber(); // TODO: we don't need bucket numbers for all buckets...
+  }
+
+  public long getCount() {
+    return count;
+  }
+
+  /** returns the existing merger for the given key, or null if none yet exists */
+  FacetMerger getExistingMerger(String key) {
+    if (subs == null) return null;
+    return subs.get(key);
+  }
+
+  private FacetMerger getMerger(String key, Object prototype) {
+    FacetMerger merger = null;
+    if (subs != null) {
+      merger = subs.get(key);
+      if (merger != null) return merger;
+    }
+
+    merger = parent.createFacetMerger(key, prototype);
+
+    if (merger != null) {
+      if (subs == null) {
+        subs = new HashMap<>();
+      }
+      subs.put(key, merger);
+    }
+
+    return merger;
+  }
+
+  public void mergeBucket(SimpleOrderedMap bucket, FacetMerger.Context mcontext) {
+    // todo: for refinements, we want to recurse, but not re-do stats for intermediate buckets
+
+    mcontext.setShardFlag(bucketNumber);
+
+    // drive merging off the received bucket?
+    for (int i=0; i<bucket.size(); i++) {
+      String key = bucket.getName(i);
+      Object val = bucket.getVal(i);
+      if ("count".equals(key)) {
+        count += ((Number)val).longValue();
+        continue;
+      }
+      if ("val".equals(key)) {
+        // this is taken care of at a higher level...
+        continue;
+      }
+
+      FacetMerger merger = getMerger(key, val);
+
+      if (merger != null) {
+        merger.merge( val , mcontext );
+      }
+    }
+  }
+
+
+  public SimpleOrderedMap getMergedBucket() {
+    SimpleOrderedMap out = new SimpleOrderedMap( (subs == null ? 0 : subs.size()) + 2 );
+    if (bucketValue != null) {
+      out.add("val", bucketValue);
+    }
+    out.add("count", count);
+    if (subs != null) {
+      for (Map.Entry<String,FacetMerger> mergerEntry : subs.entrySet()) {
+        FacetMerger subMerger = mergerEntry.getValue();
+        out.add(mergerEntry.getKey(), subMerger.getMergedResult());
+      }
+    }
+
+    return out;
+  }
+
+  public Map<String, Object> getRefinement(FacetMerger.Context mcontext, Collection<String> refineTags) {
+    if (subs == null) {
+      return null;
+    }
+    Map<String,Object> refinement = null;
+    for (String tag : refineTags) {
+      FacetMerger subMerger = subs.get(tag);
+      if (subMerger != null) {
+        Map<String,Object> subRef = subMerger.getRefinement(mcontext);
+        if (subRef != null) {
+          if (refinement == null) {
+            refinement = new HashMap<>(refineTags.size());
+          }
+          refinement.put(tag, subRef);
+        }
+      }
+    }
+    return refinement;
+  }
+
+  public Map<String, Object> getRefinement2(FacetMerger.Context mcontext, Collection<String> refineTags) {
+    // TODO - partial results should turn off refining!!!
+
+    boolean parentMissing = mcontext.bucketWasMissing();
+
+    // TODO: this is a redundant check for many types of facets... only do on field faceting
+    if (!parentMissing) {
+      // if parent bucket wasn't missing, check if this bucket was.
+      // this really only needs checking on certain buckets... (like terms facet)
+      boolean sawThisBucket = mcontext.getShardFlag(bucketNumber);
+      if (!sawThisBucket) {
+        mcontext.setBucketWasMissing(true);
+      }
+    } else {
+      // if parent bucket was missing, then we should be too
+      assert !mcontext.getShardFlag(bucketNumber);
+    }
+
+    Map<String,Object> refinement = null;
+
+    if (!mcontext.bucketWasMissing()) {
+      // this is just a pass-through bucket... see if there is anything to do at all
+      if (subs == null || refineTags.isEmpty()) {
+        return null;
+      }
+    } else {
+      // for missing bucket, go over all sub-facts
+      refineTags = null;
+      refinement = new HashMap<>(4);
+      if (bucketValue != null) {
+        refinement.put("_v", bucketValue);
+      }
+      refinement.put("_m",1);
+    }
+
+    // TODO: listing things like sub-facets that have no field facets are redundant
+    // (we only need facet that have variable values)
+
+    for (Map.Entry<String,FacetMerger> sub : subs.entrySet()) {
+      if (refineTags != null && !refineTags.contains(sub.getKey())) {
+        continue;
+      }
+      Map<String,Object> subRef = sub.getValue().getRefinement(mcontext);
+      if (subRef != null) {
+        if (refinement == null) {
+          refinement = new HashMap<>(4);
+        }
+        refinement.put(sub.getKey(), subRef);
+      }
+    }
+
+
+    // reset the "bucketMissing" flag on the way back out.
+    mcontext.setBucketWasMissing(parentMissing);
+    return refinement;
+  }
+
+}
--- a/solr/core/src/java/org/apache/solr/search/facet/FacetModule.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetModule.java
@ -473,169 +473,3 @@ class FacetQueryMerger extends FacetBucketMerger<FacetQuery> {



-class FacetBucket {
-  final FacetBucketMerger parent;
-  final Comparable bucketValue;
-  final int bucketNumber;  // this is just for internal correlation (the first bucket created is bucket 0, the next bucket 1, across all field buckets)
-
-  long count;
-  Map<String, FacetMerger> subs;
-
-  public FacetBucket(FacetBucketMerger parent, Comparable bucketValue, FacetMerger.Context mcontext) {
-    this.parent = parent;
-    this.bucketValue = bucketValue;
-    this.bucketNumber = mcontext.getNewBucketNumber(); // TODO: we don't need bucket numbers for all buckets...
-  }
-
-  public long getCount() {
-    return count;
-  }
-
-  /** returns the existing merger for the given key, or null if none yet exists */
-  FacetMerger getExistingMerger(String key) {
-    if (subs == null) return null;
-    return subs.get(key);
-  }
-
-  private FacetMerger getMerger(String key, Object prototype) {
-    FacetMerger merger = null;
-    if (subs != null) {
-      merger = subs.get(key);
-      if (merger != null) return merger;
-    }
-
-    merger = parent.createFacetMerger(key, prototype);
-
-    if (merger != null) {
-      if (subs == null) {
-        subs = new HashMap<>();
-      }
-      subs.put(key, merger);
-    }
-
-    return merger;
-  }
-
-  public void mergeBucket(SimpleOrderedMap bucket, FacetMerger.Context mcontext) {
-    // todo: for refinements, we want to recurse, but not re-do stats for intermediate buckets
-
-    mcontext.setShardFlag(bucketNumber);
-
-    // drive merging off the received bucket?
-    for (int i=0; i<bucket.size(); i++) {
-      String key = bucket.getName(i);
-      Object val = bucket.getVal(i);
-      if ("count".equals(key)) {
-        count += ((Number)val).longValue();
-        continue;
-      }
-      if ("val".equals(key)) {
-        // this is taken care of at a higher level...
-        continue;
-      }
-
-      FacetMerger merger = getMerger(key, val);
-
-      if (merger != null) {
-        merger.merge( val , mcontext );
-      }
-    }
-  }
-
-
-  public SimpleOrderedMap getMergedBucket() {
-    SimpleOrderedMap out = new SimpleOrderedMap( (subs == null ? 0 : subs.size()) + 2 );
-    if (bucketValue != null) {
-      out.add("val", bucketValue);
-    }
-    out.add("count", count);
-    if (subs != null) {
-      for (Map.Entry<String,FacetMerger> mergerEntry : subs.entrySet()) {
-        FacetMerger subMerger = mergerEntry.getValue();
-        out.add(mergerEntry.getKey(), subMerger.getMergedResult());
-      }
-    }
-
-    return out;
-  }
-
-  public Map<String, Object> getRefinement(FacetMerger.Context mcontext, Collection<String> refineTags) {
-    if (subs == null) {
-      return null;
-    }
-    Map<String,Object> refinement = null;
-    for (String tag : refineTags) {
-      FacetMerger subMerger = subs.get(tag);
-      if (subMerger != null) {
-        Map<String,Object> subRef = subMerger.getRefinement(mcontext);
-        if (subRef != null) {
-          if (refinement == null) {
-            refinement = new HashMap<>(refineTags.size());
-          }
-          refinement.put(tag, subRef);
-        }
-      }
-    }
-    return refinement;
-  }
-
-  public Map<String, Object> getRefinement2(FacetMerger.Context mcontext, Collection<String> refineTags) {
-    // TODO - partial results should turn off refining!!!
-
-    boolean parentMissing = mcontext.bucketWasMissing();
-
-    // TODO: this is a redundant check for many types of facets... only do on field faceting
-    if (!parentMissing) {
-      // if parent bucket wasn't missing, check if this bucket was.
-      // this really only needs checking on certain buckets... (like terms facet)
-      boolean sawThisBucket = mcontext.getShardFlag(bucketNumber);
-      if (!sawThisBucket) {
-        mcontext.setBucketWasMissing(true);
-      }
-    } else {
-      // if parent bucket was missing, then we should be too
-      assert !mcontext.getShardFlag(bucketNumber);
-    }
-
-    Map<String,Object> refinement = null;
-
-    if (!mcontext.bucketWasMissing()) {
-      // this is just a pass-through bucket... see if there is anything to do at all
-      if (subs == null || refineTags.isEmpty()) {
-        return null;
-      }
-    } else {
-      // for missing bucket, go over all sub-facts
-      refineTags = null;
-      refinement = new HashMap<>(4);
-      if (bucketValue != null) {
-        refinement.put("_v", bucketValue);
-      }
-      refinement.put("_m",1);
-    }
-
-    // TODO: listing things like sub-facets that have no field facets are redundant
-    // (we only need facet that have variable values)
-
-    for (Map.Entry<String,FacetMerger> sub : subs.entrySet()) {
-      if (refineTags != null && !refineTags.contains(sub.getKey())) {
-        continue;
-      }
-      Map<String,Object> subRef = sub.getValue().getRefinement(mcontext);
-      if (subRef != null) {
-        if (refinement == null) {
-          refinement = new HashMap<>(4);
-        }
-        refinement.put(sub.getKey(), subRef);
-      }
-    }
-
-
-    // reset the "bucketMissing" flag on the way back out.
-    mcontext.setBucketWasMissing(parentMissing);
-    return refinement;
-  }
-
-}
-
-
--- a/solr/core/src/java/org/apache/solr/search/facet/FacetRequestSortedMerger.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetRequestSortedMerger.java
@ -18,6 +18,7 @@
 package org.apache.solr.search.facet;

 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Comparator;
@ -157,9 +158,21 @@ abstract class FacetRequestSortedMerger<FacetRequestT extends FacetRequestSorted
    // If we are missing a bucket for this shard, we'll need to get the specific buckets that need refining.
    Collection<String> tagsWithPartial = mcontext.getSubsWithPartial(freq);

-    boolean thisMissing = mcontext.bucketWasMissing();
+    boolean thisMissing = mcontext.bucketWasMissing(); // Was this whole facet missing (i.e. inside a bucket that was missing)?

-    int num = (int)(freq.offset + freq.limit);
+    // TODO: add information in sub-shard response about dropped buckets (i.e. not all returned due to limit)
+    // If we know we've seen all the buckets from a shard, then we don't have to add to leafBuckets or missingBuckets, only skipBuckets
+    boolean isCommandPartial = freq.returnsPartial();
+    boolean returnedAllBuckets = !isCommandPartial && !thisMissing;  // did the shard return all of the possible buckets?
+
+    if (returnedAllBuckets && tags.isEmpty() && tagsWithPartial.isEmpty()) {
+      // this facet returned all possible buckets, and there were no sub-facets with partial results
+      // and sub-facets that require refining
+      return null;
+    }
+
+
+    int num = freq.limit < 0 ? Integer.MAX_VALUE : (int)(freq.offset + freq.limit);
    int numBucketsToCheck = Math.min(buckets.size(), num);

    Collection<FacetBucket> bucketList;
@ -176,8 +189,8 @@ abstract class FacetRequestSortedMerger<FacetRequestT extends FacetRequestSorted
    }

    ArrayList<Object> leafBuckets = null;    // "_l" missing buckets specified by bucket value only (no need to specify anything further)
-    ArrayList<Object> missingBuckets = null; // "_m" missing buckets that need to specify values for partial facets
-    ArrayList<Object> skipBuckets = null;    // "_s" present buckets that we need to recurse into because children facets have refinement requirements
+    ArrayList<Object> missingBuckets = null; // "_m" missing buckets that need to specify values for partial facets.. each entry is [bucketval, subs]
+    ArrayList<Object> skipBuckets = null;    // "_s" present buckets that we need to recurse into because children facets have refinement requirements. each entry is [bucketval, subs]

    for (FacetBucket bucket : bucketList) {
      if (numBucketsToCheck-- <= 0) break;
@ -196,7 +209,7 @@ abstract class FacetRequestSortedMerger<FacetRequestT extends FacetRequestSorted

          if (bucketRefinement != null) {
            if (missingBuckets==null) missingBuckets = new ArrayList<>();
-            missingBuckets.add(bucketRefinement);
+            missingBuckets.add( Arrays.asList(bucket.bucketValue, bucketRefinement) );
          }
        }

@ -211,7 +224,7 @@ abstract class FacetRequestSortedMerger<FacetRequestT extends FacetRequestSorted
        Map<String,Object> bucketRefinement = bucket.getRefinement(mcontext, tagsWithPartial);
        if (bucketRefinement != null) {
          if (skipBuckets == null) skipBuckets = new ArrayList<>();
-          skipBuckets.add(bucketRefinement);
+          skipBuckets.add( Arrays.asList(bucket.bucketValue, bucketRefinement) );
        }
      }

--- a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacetRefinement.java
+++ b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacetRefinement.java
@ -0,0 +1,218 @@
+package org.apache.solr.search.facet;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.solr.JSONTestUtil;
+import org.apache.solr.SolrTestCaseHS;
+import org.apache.solr.common.params.ModifiableSolrParams;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.util.SimpleOrderedMap;
+import org.apache.solr.request.SolrQueryRequest;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.noggit.JSONParser;
+import org.noggit.JSONUtil;
+import org.noggit.ObjectBuilder;
+
+
+public class TestJsonFacetRefinement extends SolrTestCaseHS {
+
+  private static SolrInstances servers;  // for distributed testing
+  private static int origTableSize;
+
+  @BeforeClass
+  public static void beforeTests() throws Exception {
+    JSONTestUtil.failRepeatedKeys = true;
+    initCore("solrconfig-tlog.xml","schema_latest.xml");
+  }
+
+  public static void initServers() throws Exception {
+    if (servers == null) {
+      servers = new SolrInstances(3, "solrconfig-tlog.xml", "schema_latest.xml");
+    }
+  }
+
+  @AfterClass
+  public static void afterTests() throws Exception {
+    JSONTestUtil.failRepeatedKeys = false;
+    if (servers != null) {
+      servers.stop();
+      servers = null;
+    }
+  }
+
+
+  // todo - pull up to test base class?
+  public void matchJSON(String json, double delta, String... tests) throws Exception {
+    for (String test : tests) {
+      if (test == null) {
+        assertNull(json);
+        continue;
+      }
+      if (test.length()==0) continue;
+
+      String err = JSONTestUtil.match(json, test, delta);
+
+      if (err != null) {
+        throw new RuntimeException("JSON failed validation. error=" + err +
+            "\n expected =" + test +
+            "\n got = " + json
+        );
+      }
+    }
+  }
+
+
+  public void match(Object input, double delta, String... tests) throws Exception {
+    for (String test : tests) {
+      String err = null;
+      if (test == null) {
+        if (input != null) {
+          err = "expected null";
+        }
+      } else if (input == null) {
+        err = "got null";
+      } else {
+        err = JSONTestUtil.matchObj(input, test, delta);
+      }
+
+      if (err != null) {
+        throw new RuntimeException("JSON failed validation. error=" + err +
+            "\n expected =" + test +
+            "\n got = " + input
+        );
+      }
+    }
+  }
+
+
+  /** Use SimpleOrderedMap rather than Map to match responses from shards */
+  public static Object fromJSON(String json) throws IOException {
+    JSONParser parser = new JSONParser(json);
+    ObjectBuilder ob = new ObjectBuilder(parser) {
+      @Override
+      public Object newObject() throws IOException {
+        return new SimpleOrderedMap();
+      }
+
+      @Override
+      public void addKeyVal(Object map, Object key, Object val) throws IOException {
+        ((SimpleOrderedMap)map).add(key.toString(), val);
+      }
+    };
+
+    return ob.getObject();
+  }
+
+  void doTestRefine(String facet, String... responsesAndTests) throws Exception {
+    SolrQueryRequest req = req();
+    try {
+      int nShards = responsesAndTests.length / 2;
+      Object jsonFacet = ObjectBuilder.fromJSON(facet);
+      FacetParser parser = new FacetTopParser(req);
+      FacetRequest facetRequest = parser.parse(jsonFacet);
+
+      FacetMerger merger = null;
+      FacetMerger.Context ctx = new FacetMerger.Context(nShards);
+      for (int i=0; i<nShards; i++) {
+        Object response = fromJSON(responsesAndTests[i]);
+        if (i==0) {
+          merger = facetRequest.createFacetMerger(response);
+        }
+        ctx.newShard("s"+i);
+        merger.merge(response, ctx);
+      }
+
+      for (int i=0; i<nShards; i++) {
+        ctx.setShard("s"+i);
+        Object refinement = merger.getRefinement(ctx);
+        String tests = responsesAndTests[nShards+i];
+        match(refinement, 1e-5, tests);
+      }
+
+    } finally {
+      req.close();
+    }
+
+  }
+
+  @Test
+  public void testMerge() throws Exception {
+    doTestRefine("{x : {type:terms, field:X, limit:2, refine:true} }",  // the facet request
+        "{x: {buckets:[{val:x1, count:5}, {val:x2, count:3}] } }",  // shard0 response
+        "{x: {buckets:[{val:x2, count:4}, {val:x3, count:2}] } }",  // shard1 response
+        null,              // shard0 expected refinement info
+        "=={x:{_l:[x1]}}"  // shard1 expected refinement info
+        );
+
+    // same test w/o refinement turned on
+    doTestRefine("{x : {type:terms, field:X, limit:2} }",  // the facet request
+        "{x: {buckets:[{val:x1, count:5}, {val:x2, count:3}] } }",  // shard0 response
+        "{x: {buckets:[{val:x2, count:4}, {val:x3, count:2}] } }",  // shard1 response
+        null, // shard0 expected refinement info
+        null  // shard1 expected refinement info
+    );
+
+    // same test, but nested in query facet
+    doTestRefine("{top:{type:query, q:'foo_s:myquery', facet:{x : {type:terms, field:X, limit:2, refine:true} } } }",  // the facet request
+        "{top: {x: {buckets:[{val:x1, count:5}, {val:x2, count:3}] } } }",  // shard0 response
+        "{top: {x: {buckets:[{val:x2, count:4}, {val:x3, count:2}] } } }",  // shard1 response
+        null,              // shard0 expected refinement info
+        "=={top:{x:{_l:[x1]}}}"  // shard1 expected refinement info
+    );
+
+    // same test w/o refinement turned on
+    doTestRefine("{top:{type:query, q:'foo_s:myquery', facet:{x : {type:terms, field:X, limit:2, refine:false} } } }",
+        "{top: {x: {buckets:[{val:x1, count:5}, {val:x2, count:3}] } } }",  // shard0 response
+        "{top: {x: {buckets:[{val:x2, count:4}, {val:x3, count:2}] } } }",  // shard1 response
+        null,
+        null
+    );
+
+    // same test, but nested in a terms facet
+    doTestRefine("{top:{type:terms, field:Afield, facet:{x : {type:terms, field:X, limit:2, refine:true} } } }",
+        "{top: {buckets:[{val:'A', count:2, x:{buckets:[{val:x1, count:5},{val:x2, count:3}]} } ] } }",
+        "{top: {buckets:[{val:'A', count:1, x:{buckets:[{val:x2, count:4},{val:x3, count:2}]} } ] } }",
+        null,
+        "=={top: {" +
+            "_s:[  ['A' , {x:{_l:[x1]}} ]  ]" +
+            "    }  " +
+            "}"
+    );
+
+    // same test, but nested in range facet
+    doTestRefine("{top:{type:range, field:R, start:0, end:1, gap:1, facet:{x : {type:terms, field:X, limit:2, refine:true} } } }",
+        "{top: {buckets:[{val:0, count:2, x:{buckets:[{val:x1, count:5},{val:x2, count:3}]} } ] } }",
+        "{top: {buckets:[{val:0, count:1, x:{buckets:[{val:x2, count:4},{val:x3, count:2}]} } ] } }",
+        null,
+        "=={top: {" +
+            "_s:[  [0 , {x:{_l:[x1]}} ]  ]" +
+            "    }  " +
+            "}"
+    );
+
+  }
+
+
+
+
+}