From 690a78780ddb2b2a0e925136dac72f93bf1b9aad Mon Sep 17 00:00:00 2001 From: yonik Date: Tue, 11 Oct 2016 17:13:36 -0400 Subject: [PATCH] SOLR-9432: json facet refinement progress, test refinement info going to shards --- .../apache/solr/search/facet/FacetBucket.java | 189 +++++++++++++++ .../apache/solr/search/facet/FacetModule.java | 166 ------------- .../facet/FacetRequestSortedMerger.java | 25 +- .../search/facet/TestJsonFacetRefinement.java | 218 ++++++++++++++++++ 4 files changed, 426 insertions(+), 172 deletions(-) create mode 100644 solr/core/src/java/org/apache/solr/search/facet/FacetBucket.java create mode 100644 solr/core/src/test/org/apache/solr/search/facet/TestJsonFacetRefinement.java diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetBucket.java b/solr/core/src/java/org/apache/solr/search/facet/FacetBucket.java new file mode 100644 index 00000000000..ae1eba68488 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetBucket.java @@ -0,0 +1,189 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.search.facet; + +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; + +import org.apache.solr.common.util.SimpleOrderedMap; + +public class FacetBucket { + final FacetBucketMerger parent; + final Comparable bucketValue; + final int bucketNumber; // this is just for internal correlation (the first bucket created is bucket 0, the next bucket 1, across all field buckets) + + long count; + Map subs; + + public FacetBucket(FacetBucketMerger parent, Comparable bucketValue, FacetMerger.Context mcontext) { + this.parent = parent; + this.bucketValue = bucketValue; + this.bucketNumber = mcontext.getNewBucketNumber(); // TODO: we don't need bucket numbers for all buckets... + } + + public long getCount() { + return count; + } + + /** returns the existing merger for the given key, or null if none yet exists */ + FacetMerger getExistingMerger(String key) { + if (subs == null) return null; + return subs.get(key); + } + + private FacetMerger getMerger(String key, Object prototype) { + FacetMerger merger = null; + if (subs != null) { + merger = subs.get(key); + if (merger != null) return merger; + } + + merger = parent.createFacetMerger(key, prototype); + + if (merger != null) { + if (subs == null) { + subs = new HashMap<>(); + } + subs.put(key, merger); + } + + return merger; + } + + public void mergeBucket(SimpleOrderedMap bucket, FacetMerger.Context mcontext) { + // todo: for refinements, we want to recurse, but not re-do stats for intermediate buckets + + mcontext.setShardFlag(bucketNumber); + + // drive merging off the received bucket? + for (int i=0; i mergerEntry : subs.entrySet()) { + FacetMerger subMerger = mergerEntry.getValue(); + out.add(mergerEntry.getKey(), subMerger.getMergedResult()); + } + } + + return out; + } + + public Map getRefinement(FacetMerger.Context mcontext, Collection refineTags) { + if (subs == null) { + return null; + } + Map refinement = null; + for (String tag : refineTags) { + FacetMerger subMerger = subs.get(tag); + if (subMerger != null) { + Map subRef = subMerger.getRefinement(mcontext); + if (subRef != null) { + if (refinement == null) { + refinement = new HashMap<>(refineTags.size()); + } + refinement.put(tag, subRef); + } + } + } + return refinement; + } + + public Map getRefinement2(FacetMerger.Context mcontext, Collection refineTags) { + // TODO - partial results should turn off refining!!! + + boolean parentMissing = mcontext.bucketWasMissing(); + + // TODO: this is a redundant check for many types of facets... only do on field faceting + if (!parentMissing) { + // if parent bucket wasn't missing, check if this bucket was. + // this really only needs checking on certain buckets... (like terms facet) + boolean sawThisBucket = mcontext.getShardFlag(bucketNumber); + if (!sawThisBucket) { + mcontext.setBucketWasMissing(true); + } + } else { + // if parent bucket was missing, then we should be too + assert !mcontext.getShardFlag(bucketNumber); + } + + Map refinement = null; + + if (!mcontext.bucketWasMissing()) { + // this is just a pass-through bucket... see if there is anything to do at all + if (subs == null || refineTags.isEmpty()) { + return null; + } + } else { + // for missing bucket, go over all sub-facts + refineTags = null; + refinement = new HashMap<>(4); + if (bucketValue != null) { + refinement.put("_v", bucketValue); + } + refinement.put("_m",1); + } + + // TODO: listing things like sub-facets that have no field facets are redundant + // (we only need facet that have variable values) + + for (Map.Entry sub : subs.entrySet()) { + if (refineTags != null && !refineTags.contains(sub.getKey())) { + continue; + } + Map subRef = sub.getValue().getRefinement(mcontext); + if (subRef != null) { + if (refinement == null) { + refinement = new HashMap<>(4); + } + refinement.put(sub.getKey(), subRef); + } + } + + + // reset the "bucketMissing" flag on the way back out. + mcontext.setBucketWasMissing(parentMissing); + return refinement; + } + +} diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetModule.java b/solr/core/src/java/org/apache/solr/search/facet/FacetModule.java index 8767e5b3bdb..f8d677a9a1d 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FacetModule.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetModule.java @@ -473,169 +473,3 @@ class FacetQueryMerger extends FacetBucketMerger { -class FacetBucket { - final FacetBucketMerger parent; - final Comparable bucketValue; - final int bucketNumber; // this is just for internal correlation (the first bucket created is bucket 0, the next bucket 1, across all field buckets) - - long count; - Map subs; - - public FacetBucket(FacetBucketMerger parent, Comparable bucketValue, FacetMerger.Context mcontext) { - this.parent = parent; - this.bucketValue = bucketValue; - this.bucketNumber = mcontext.getNewBucketNumber(); // TODO: we don't need bucket numbers for all buckets... - } - - public long getCount() { - return count; - } - - /** returns the existing merger for the given key, or null if none yet exists */ - FacetMerger getExistingMerger(String key) { - if (subs == null) return null; - return subs.get(key); - } - - private FacetMerger getMerger(String key, Object prototype) { - FacetMerger merger = null; - if (subs != null) { - merger = subs.get(key); - if (merger != null) return merger; - } - - merger = parent.createFacetMerger(key, prototype); - - if (merger != null) { - if (subs == null) { - subs = new HashMap<>(); - } - subs.put(key, merger); - } - - return merger; - } - - public void mergeBucket(SimpleOrderedMap bucket, FacetMerger.Context mcontext) { - // todo: for refinements, we want to recurse, but not re-do stats for intermediate buckets - - mcontext.setShardFlag(bucketNumber); - - // drive merging off the received bucket? - for (int i=0; i mergerEntry : subs.entrySet()) { - FacetMerger subMerger = mergerEntry.getValue(); - out.add(mergerEntry.getKey(), subMerger.getMergedResult()); - } - } - - return out; - } - - public Map getRefinement(FacetMerger.Context mcontext, Collection refineTags) { - if (subs == null) { - return null; - } - Map refinement = null; - for (String tag : refineTags) { - FacetMerger subMerger = subs.get(tag); - if (subMerger != null) { - Map subRef = subMerger.getRefinement(mcontext); - if (subRef != null) { - if (refinement == null) { - refinement = new HashMap<>(refineTags.size()); - } - refinement.put(tag, subRef); - } - } - } - return refinement; - } - - public Map getRefinement2(FacetMerger.Context mcontext, Collection refineTags) { - // TODO - partial results should turn off refining!!! - - boolean parentMissing = mcontext.bucketWasMissing(); - - // TODO: this is a redundant check for many types of facets... only do on field faceting - if (!parentMissing) { - // if parent bucket wasn't missing, check if this bucket was. - // this really only needs checking on certain buckets... (like terms facet) - boolean sawThisBucket = mcontext.getShardFlag(bucketNumber); - if (!sawThisBucket) { - mcontext.setBucketWasMissing(true); - } - } else { - // if parent bucket was missing, then we should be too - assert !mcontext.getShardFlag(bucketNumber); - } - - Map refinement = null; - - if (!mcontext.bucketWasMissing()) { - // this is just a pass-through bucket... see if there is anything to do at all - if (subs == null || refineTags.isEmpty()) { - return null; - } - } else { - // for missing bucket, go over all sub-facts - refineTags = null; - refinement = new HashMap<>(4); - if (bucketValue != null) { - refinement.put("_v", bucketValue); - } - refinement.put("_m",1); - } - - // TODO: listing things like sub-facets that have no field facets are redundant - // (we only need facet that have variable values) - - for (Map.Entry sub : subs.entrySet()) { - if (refineTags != null && !refineTags.contains(sub.getKey())) { - continue; - } - Map subRef = sub.getValue().getRefinement(mcontext); - if (subRef != null) { - if (refinement == null) { - refinement = new HashMap<>(4); - } - refinement.put(sub.getKey(), subRef); - } - } - - - // reset the "bucketMissing" flag on the way back out. - mcontext.setBucketWasMissing(parentMissing); - return refinement; - } - -} - - diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetRequestSortedMerger.java b/solr/core/src/java/org/apache/solr/search/facet/FacetRequestSortedMerger.java index a9810069670..f55fc0fc180 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FacetRequestSortedMerger.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetRequestSortedMerger.java @@ -18,6 +18,7 @@ package org.apache.solr.search.facet; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.Comparator; @@ -157,9 +158,21 @@ abstract class FacetRequestSortedMerger tagsWithPartial = mcontext.getSubsWithPartial(freq); - boolean thisMissing = mcontext.bucketWasMissing(); + boolean thisMissing = mcontext.bucketWasMissing(); // Was this whole facet missing (i.e. inside a bucket that was missing)? - int num = (int)(freq.offset + freq.limit); + // TODO: add information in sub-shard response about dropped buckets (i.e. not all returned due to limit) + // If we know we've seen all the buckets from a shard, then we don't have to add to leafBuckets or missingBuckets, only skipBuckets + boolean isCommandPartial = freq.returnsPartial(); + boolean returnedAllBuckets = !isCommandPartial && !thisMissing; // did the shard return all of the possible buckets? + + if (returnedAllBuckets && tags.isEmpty() && tagsWithPartial.isEmpty()) { + // this facet returned all possible buckets, and there were no sub-facets with partial results + // and sub-facets that require refining + return null; + } + + + int num = freq.limit < 0 ? Integer.MAX_VALUE : (int)(freq.offset + freq.limit); int numBucketsToCheck = Math.min(buckets.size(), num); Collection bucketList; @@ -176,8 +189,8 @@ abstract class FacetRequestSortedMerger leafBuckets = null; // "_l" missing buckets specified by bucket value only (no need to specify anything further) - ArrayList missingBuckets = null; // "_m" missing buckets that need to specify values for partial facets - ArrayList skipBuckets = null; // "_s" present buckets that we need to recurse into because children facets have refinement requirements + ArrayList missingBuckets = null; // "_m" missing buckets that need to specify values for partial facets.. each entry is [bucketval, subs] + ArrayList skipBuckets = null; // "_s" present buckets that we need to recurse into because children facets have refinement requirements. each entry is [bucketval, subs] for (FacetBucket bucket : bucketList) { if (numBucketsToCheck-- <= 0) break; @@ -196,7 +209,7 @@ abstract class FacetRequestSortedMerger(); - missingBuckets.add(bucketRefinement); + missingBuckets.add( Arrays.asList(bucket.bucketValue, bucketRefinement) ); } } @@ -211,7 +224,7 @@ abstract class FacetRequestSortedMerger bucketRefinement = bucket.getRefinement(mcontext, tagsWithPartial); if (bucketRefinement != null) { if (skipBuckets == null) skipBuckets = new ArrayList<>(); - skipBuckets.add(bucketRefinement); + skipBuckets.add( Arrays.asList(bucket.bucketValue, bucketRefinement) ); } } diff --git a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacetRefinement.java b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacetRefinement.java new file mode 100644 index 00000000000..7c510eada69 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacetRefinement.java @@ -0,0 +1,218 @@ +package org.apache.solr.search.facet; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Map; + +import org.apache.solr.JSONTestUtil; +import org.apache.solr.SolrTestCaseHS; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.request.SolrQueryRequest; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; +import org.noggit.JSONParser; +import org.noggit.JSONUtil; +import org.noggit.ObjectBuilder; + + +public class TestJsonFacetRefinement extends SolrTestCaseHS { + + private static SolrInstances servers; // for distributed testing + private static int origTableSize; + + @BeforeClass + public static void beforeTests() throws Exception { + JSONTestUtil.failRepeatedKeys = true; + initCore("solrconfig-tlog.xml","schema_latest.xml"); + } + + public static void initServers() throws Exception { + if (servers == null) { + servers = new SolrInstances(3, "solrconfig-tlog.xml", "schema_latest.xml"); + } + } + + @AfterClass + public static void afterTests() throws Exception { + JSONTestUtil.failRepeatedKeys = false; + if (servers != null) { + servers.stop(); + servers = null; + } + } + + + // todo - pull up to test base class? + public void matchJSON(String json, double delta, String... tests) throws Exception { + for (String test : tests) { + if (test == null) { + assertNull(json); + continue; + } + if (test.length()==0) continue; + + String err = JSONTestUtil.match(json, test, delta); + + if (err != null) { + throw new RuntimeException("JSON failed validation. error=" + err + + "\n expected =" + test + + "\n got = " + json + ); + } + } + } + + + public void match(Object input, double delta, String... tests) throws Exception { + for (String test : tests) { + String err = null; + if (test == null) { + if (input != null) { + err = "expected null"; + } + } else if (input == null) { + err = "got null"; + } else { + err = JSONTestUtil.matchObj(input, test, delta); + } + + if (err != null) { + throw new RuntimeException("JSON failed validation. error=" + err + + "\n expected =" + test + + "\n got = " + input + ); + } + } + } + + + /** Use SimpleOrderedMap rather than Map to match responses from shards */ + public static Object fromJSON(String json) throws IOException { + JSONParser parser = new JSONParser(json); + ObjectBuilder ob = new ObjectBuilder(parser) { + @Override + public Object newObject() throws IOException { + return new SimpleOrderedMap(); + } + + @Override + public void addKeyVal(Object map, Object key, Object val) throws IOException { + ((SimpleOrderedMap)map).add(key.toString(), val); + } + }; + + return ob.getObject(); + } + + void doTestRefine(String facet, String... responsesAndTests) throws Exception { + SolrQueryRequest req = req(); + try { + int nShards = responsesAndTests.length / 2; + Object jsonFacet = ObjectBuilder.fromJSON(facet); + FacetParser parser = new FacetTopParser(req); + FacetRequest facetRequest = parser.parse(jsonFacet); + + FacetMerger merger = null; + FacetMerger.Context ctx = new FacetMerger.Context(nShards); + for (int i=0; i