SOLR-9432: json facet refinement progress, test refinement info going to shards

This commit is contained in:
yonik 2016-10-11 17:13:36 -04:00
parent 7660dde458
commit 0f08ad9ad3
4 changed files with 426 additions and 172 deletions

View File

@ -0,0 +1,189 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search.facet;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import org.apache.solr.common.util.SimpleOrderedMap;
public class FacetBucket {
final FacetBucketMerger parent;
final Comparable bucketValue;
final int bucketNumber; // this is just for internal correlation (the first bucket created is bucket 0, the next bucket 1, across all field buckets)
long count;
Map<String, FacetMerger> subs;
public FacetBucket(FacetBucketMerger parent, Comparable bucketValue, FacetMerger.Context mcontext) {
this.parent = parent;
this.bucketValue = bucketValue;
this.bucketNumber = mcontext.getNewBucketNumber(); // TODO: we don't need bucket numbers for all buckets...
}
public long getCount() {
return count;
}
/** returns the existing merger for the given key, or null if none yet exists */
FacetMerger getExistingMerger(String key) {
if (subs == null) return null;
return subs.get(key);
}
private FacetMerger getMerger(String key, Object prototype) {
FacetMerger merger = null;
if (subs != null) {
merger = subs.get(key);
if (merger != null) return merger;
}
merger = parent.createFacetMerger(key, prototype);
if (merger != null) {
if (subs == null) {
subs = new HashMap<>();
}
subs.put(key, merger);
}
return merger;
}
public void mergeBucket(SimpleOrderedMap bucket, FacetMerger.Context mcontext) {
// todo: for refinements, we want to recurse, but not re-do stats for intermediate buckets
mcontext.setShardFlag(bucketNumber);
// drive merging off the received bucket?
for (int i=0; i<bucket.size(); i++) {
String key = bucket.getName(i);
Object val = bucket.getVal(i);
if ("count".equals(key)) {
count += ((Number)val).longValue();
continue;
}
if ("val".equals(key)) {
// this is taken care of at a higher level...
continue;
}
FacetMerger merger = getMerger(key, val);
if (merger != null) {
merger.merge( val , mcontext );
}
}
}
public SimpleOrderedMap getMergedBucket() {
SimpleOrderedMap out = new SimpleOrderedMap( (subs == null ? 0 : subs.size()) + 2 );
if (bucketValue != null) {
out.add("val", bucketValue);
}
out.add("count", count);
if (subs != null) {
for (Map.Entry<String,FacetMerger> mergerEntry : subs.entrySet()) {
FacetMerger subMerger = mergerEntry.getValue();
out.add(mergerEntry.getKey(), subMerger.getMergedResult());
}
}
return out;
}
public Map<String, Object> getRefinement(FacetMerger.Context mcontext, Collection<String> refineTags) {
if (subs == null) {
return null;
}
Map<String,Object> refinement = null;
for (String tag : refineTags) {
FacetMerger subMerger = subs.get(tag);
if (subMerger != null) {
Map<String,Object> subRef = subMerger.getRefinement(mcontext);
if (subRef != null) {
if (refinement == null) {
refinement = new HashMap<>(refineTags.size());
}
refinement.put(tag, subRef);
}
}
}
return refinement;
}
public Map<String, Object> getRefinement2(FacetMerger.Context mcontext, Collection<String> refineTags) {
// TODO - partial results should turn off refining!!!
boolean parentMissing = mcontext.bucketWasMissing();
// TODO: this is a redundant check for many types of facets... only do on field faceting
if (!parentMissing) {
// if parent bucket wasn't missing, check if this bucket was.
// this really only needs checking on certain buckets... (like terms facet)
boolean sawThisBucket = mcontext.getShardFlag(bucketNumber);
if (!sawThisBucket) {
mcontext.setBucketWasMissing(true);
}
} else {
// if parent bucket was missing, then we should be too
assert !mcontext.getShardFlag(bucketNumber);
}
Map<String,Object> refinement = null;
if (!mcontext.bucketWasMissing()) {
// this is just a pass-through bucket... see if there is anything to do at all
if (subs == null || refineTags.isEmpty()) {
return null;
}
} else {
// for missing bucket, go over all sub-facts
refineTags = null;
refinement = new HashMap<>(4);
if (bucketValue != null) {
refinement.put("_v", bucketValue);
}
refinement.put("_m",1);
}
// TODO: listing things like sub-facets that have no field facets are redundant
// (we only need facet that have variable values)
for (Map.Entry<String,FacetMerger> sub : subs.entrySet()) {
if (refineTags != null && !refineTags.contains(sub.getKey())) {
continue;
}
Map<String,Object> subRef = sub.getValue().getRefinement(mcontext);
if (subRef != null) {
if (refinement == null) {
refinement = new HashMap<>(4);
}
refinement.put(sub.getKey(), subRef);
}
}
// reset the "bucketMissing" flag on the way back out.
mcontext.setBucketWasMissing(parentMissing);
return refinement;
}
}

View File

@ -473,169 +473,3 @@ class FacetQueryMerger extends FacetBucketMerger<FacetQuery> {
class FacetBucket {
final FacetBucketMerger parent;
final Comparable bucketValue;
final int bucketNumber; // this is just for internal correlation (the first bucket created is bucket 0, the next bucket 1, across all field buckets)
long count;
Map<String, FacetMerger> subs;
public FacetBucket(FacetBucketMerger parent, Comparable bucketValue, FacetMerger.Context mcontext) {
this.parent = parent;
this.bucketValue = bucketValue;
this.bucketNumber = mcontext.getNewBucketNumber(); // TODO: we don't need bucket numbers for all buckets...
}
public long getCount() {
return count;
}
/** returns the existing merger for the given key, or null if none yet exists */
FacetMerger getExistingMerger(String key) {
if (subs == null) return null;
return subs.get(key);
}
private FacetMerger getMerger(String key, Object prototype) {
FacetMerger merger = null;
if (subs != null) {
merger = subs.get(key);
if (merger != null) return merger;
}
merger = parent.createFacetMerger(key, prototype);
if (merger != null) {
if (subs == null) {
subs = new HashMap<>();
}
subs.put(key, merger);
}
return merger;
}
public void mergeBucket(SimpleOrderedMap bucket, FacetMerger.Context mcontext) {
// todo: for refinements, we want to recurse, but not re-do stats for intermediate buckets
mcontext.setShardFlag(bucketNumber);
// drive merging off the received bucket?
for (int i=0; i<bucket.size(); i++) {
String key = bucket.getName(i);
Object val = bucket.getVal(i);
if ("count".equals(key)) {
count += ((Number)val).longValue();
continue;
}
if ("val".equals(key)) {
// this is taken care of at a higher level...
continue;
}
FacetMerger merger = getMerger(key, val);
if (merger != null) {
merger.merge( val , mcontext );
}
}
}
public SimpleOrderedMap getMergedBucket() {
SimpleOrderedMap out = new SimpleOrderedMap( (subs == null ? 0 : subs.size()) + 2 );
if (bucketValue != null) {
out.add("val", bucketValue);
}
out.add("count", count);
if (subs != null) {
for (Map.Entry<String,FacetMerger> mergerEntry : subs.entrySet()) {
FacetMerger subMerger = mergerEntry.getValue();
out.add(mergerEntry.getKey(), subMerger.getMergedResult());
}
}
return out;
}
public Map<String, Object> getRefinement(FacetMerger.Context mcontext, Collection<String> refineTags) {
if (subs == null) {
return null;
}
Map<String,Object> refinement = null;
for (String tag : refineTags) {
FacetMerger subMerger = subs.get(tag);
if (subMerger != null) {
Map<String,Object> subRef = subMerger.getRefinement(mcontext);
if (subRef != null) {
if (refinement == null) {
refinement = new HashMap<>(refineTags.size());
}
refinement.put(tag, subRef);
}
}
}
return refinement;
}
public Map<String, Object> getRefinement2(FacetMerger.Context mcontext, Collection<String> refineTags) {
// TODO - partial results should turn off refining!!!
boolean parentMissing = mcontext.bucketWasMissing();
// TODO: this is a redundant check for many types of facets... only do on field faceting
if (!parentMissing) {
// if parent bucket wasn't missing, check if this bucket was.
// this really only needs checking on certain buckets... (like terms facet)
boolean sawThisBucket = mcontext.getShardFlag(bucketNumber);
if (!sawThisBucket) {
mcontext.setBucketWasMissing(true);
}
} else {
// if parent bucket was missing, then we should be too
assert !mcontext.getShardFlag(bucketNumber);
}
Map<String,Object> refinement = null;
if (!mcontext.bucketWasMissing()) {
// this is just a pass-through bucket... see if there is anything to do at all
if (subs == null || refineTags.isEmpty()) {
return null;
}
} else {
// for missing bucket, go over all sub-facts
refineTags = null;
refinement = new HashMap<>(4);
if (bucketValue != null) {
refinement.put("_v", bucketValue);
}
refinement.put("_m",1);
}
// TODO: listing things like sub-facets that have no field facets are redundant
// (we only need facet that have variable values)
for (Map.Entry<String,FacetMerger> sub : subs.entrySet()) {
if (refineTags != null && !refineTags.contains(sub.getKey())) {
continue;
}
Map<String,Object> subRef = sub.getValue().getRefinement(mcontext);
if (subRef != null) {
if (refinement == null) {
refinement = new HashMap<>(4);
}
refinement.put(sub.getKey(), subRef);
}
}
// reset the "bucketMissing" flag on the way back out.
mcontext.setBucketWasMissing(parentMissing);
return refinement;
}
}

View File

@ -18,6 +18,7 @@
package org.apache.solr.search.facet;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
@ -157,9 +158,21 @@ abstract class FacetRequestSortedMerger<FacetRequestT extends FacetRequestSorted
// If we are missing a bucket for this shard, we'll need to get the specific buckets that need refining.
Collection<String> tagsWithPartial = mcontext.getSubsWithPartial(freq);
boolean thisMissing = mcontext.bucketWasMissing();
boolean thisMissing = mcontext.bucketWasMissing(); // Was this whole facet missing (i.e. inside a bucket that was missing)?
int num = (int)(freq.offset + freq.limit);
// TODO: add information in sub-shard response about dropped buckets (i.e. not all returned due to limit)
// If we know we've seen all the buckets from a shard, then we don't have to add to leafBuckets or missingBuckets, only skipBuckets
boolean isCommandPartial = freq.returnsPartial();
boolean returnedAllBuckets = !isCommandPartial && !thisMissing; // did the shard return all of the possible buckets?
if (returnedAllBuckets && tags.isEmpty() && tagsWithPartial.isEmpty()) {
// this facet returned all possible buckets, and there were no sub-facets with partial results
// and sub-facets that require refining
return null;
}
int num = freq.limit < 0 ? Integer.MAX_VALUE : (int)(freq.offset + freq.limit);
int numBucketsToCheck = Math.min(buckets.size(), num);
Collection<FacetBucket> bucketList;
@ -176,8 +189,8 @@ abstract class FacetRequestSortedMerger<FacetRequestT extends FacetRequestSorted
}
ArrayList<Object> leafBuckets = null; // "_l" missing buckets specified by bucket value only (no need to specify anything further)
ArrayList<Object> missingBuckets = null; // "_m" missing buckets that need to specify values for partial facets
ArrayList<Object> skipBuckets = null; // "_s" present buckets that we need to recurse into because children facets have refinement requirements
ArrayList<Object> missingBuckets = null; // "_m" missing buckets that need to specify values for partial facets.. each entry is [bucketval, subs]
ArrayList<Object> skipBuckets = null; // "_s" present buckets that we need to recurse into because children facets have refinement requirements. each entry is [bucketval, subs]
for (FacetBucket bucket : bucketList) {
if (numBucketsToCheck-- <= 0) break;
@ -196,7 +209,7 @@ abstract class FacetRequestSortedMerger<FacetRequestT extends FacetRequestSorted
if (bucketRefinement != null) {
if (missingBuckets==null) missingBuckets = new ArrayList<>();
missingBuckets.add(bucketRefinement);
missingBuckets.add( Arrays.asList(bucket.bucketValue, bucketRefinement) );
}
}
@ -211,7 +224,7 @@ abstract class FacetRequestSortedMerger<FacetRequestT extends FacetRequestSorted
Map<String,Object> bucketRefinement = bucket.getRefinement(mcontext, tagsWithPartial);
if (bucketRefinement != null) {
if (skipBuckets == null) skipBuckets = new ArrayList<>();
skipBuckets.add(bucketRefinement);
skipBuckets.add( Arrays.asList(bucket.bucketValue, bucketRefinement) );
}
}

View File

@ -0,0 +1,218 @@
package org.apache.solr.search.facet;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Map;
import org.apache.solr.JSONTestUtil;
import org.apache.solr.SolrTestCaseHS;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.request.SolrQueryRequest;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
import org.noggit.JSONParser;
import org.noggit.JSONUtil;
import org.noggit.ObjectBuilder;
public class TestJsonFacetRefinement extends SolrTestCaseHS {
private static SolrInstances servers; // for distributed testing
private static int origTableSize;
@BeforeClass
public static void beforeTests() throws Exception {
JSONTestUtil.failRepeatedKeys = true;
initCore("solrconfig-tlog.xml","schema_latest.xml");
}
public static void initServers() throws Exception {
if (servers == null) {
servers = new SolrInstances(3, "solrconfig-tlog.xml", "schema_latest.xml");
}
}
@AfterClass
public static void afterTests() throws Exception {
JSONTestUtil.failRepeatedKeys = false;
if (servers != null) {
servers.stop();
servers = null;
}
}
// todo - pull up to test base class?
public void matchJSON(String json, double delta, String... tests) throws Exception {
for (String test : tests) {
if (test == null) {
assertNull(json);
continue;
}
if (test.length()==0) continue;
String err = JSONTestUtil.match(json, test, delta);
if (err != null) {
throw new RuntimeException("JSON failed validation. error=" + err +
"\n expected =" + test +
"\n got = " + json
);
}
}
}
public void match(Object input, double delta, String... tests) throws Exception {
for (String test : tests) {
String err = null;
if (test == null) {
if (input != null) {
err = "expected null";
}
} else if (input == null) {
err = "got null";
} else {
err = JSONTestUtil.matchObj(input, test, delta);
}
if (err != null) {
throw new RuntimeException("JSON failed validation. error=" + err +
"\n expected =" + test +
"\n got = " + input
);
}
}
}
/** Use SimpleOrderedMap rather than Map to match responses from shards */
public static Object fromJSON(String json) throws IOException {
JSONParser parser = new JSONParser(json);
ObjectBuilder ob = new ObjectBuilder(parser) {
@Override
public Object newObject() throws IOException {
return new SimpleOrderedMap();
}
@Override
public void addKeyVal(Object map, Object key, Object val) throws IOException {
((SimpleOrderedMap)map).add(key.toString(), val);
}
};
return ob.getObject();
}
void doTestRefine(String facet, String... responsesAndTests) throws Exception {
SolrQueryRequest req = req();
try {
int nShards = responsesAndTests.length / 2;
Object jsonFacet = ObjectBuilder.fromJSON(facet);
FacetParser parser = new FacetTopParser(req);
FacetRequest facetRequest = parser.parse(jsonFacet);
FacetMerger merger = null;
FacetMerger.Context ctx = new FacetMerger.Context(nShards);
for (int i=0; i<nShards; i++) {
Object response = fromJSON(responsesAndTests[i]);
if (i==0) {
merger = facetRequest.createFacetMerger(response);
}
ctx.newShard("s"+i);
merger.merge(response, ctx);
}
for (int i=0; i<nShards; i++) {
ctx.setShard("s"+i);
Object refinement = merger.getRefinement(ctx);
String tests = responsesAndTests[nShards+i];
match(refinement, 1e-5, tests);
}
} finally {
req.close();
}
}
@Test
public void testMerge() throws Exception {
doTestRefine("{x : {type:terms, field:X, limit:2, refine:true} }", // the facet request
"{x: {buckets:[{val:x1, count:5}, {val:x2, count:3}] } }", // shard0 response
"{x: {buckets:[{val:x2, count:4}, {val:x3, count:2}] } }", // shard1 response
null, // shard0 expected refinement info
"=={x:{_l:[x1]}}" // shard1 expected refinement info
);
// same test w/o refinement turned on
doTestRefine("{x : {type:terms, field:X, limit:2} }", // the facet request
"{x: {buckets:[{val:x1, count:5}, {val:x2, count:3}] } }", // shard0 response
"{x: {buckets:[{val:x2, count:4}, {val:x3, count:2}] } }", // shard1 response
null, // shard0 expected refinement info
null // shard1 expected refinement info
);
// same test, but nested in query facet
doTestRefine("{top:{type:query, q:'foo_s:myquery', facet:{x : {type:terms, field:X, limit:2, refine:true} } } }", // the facet request
"{top: {x: {buckets:[{val:x1, count:5}, {val:x2, count:3}] } } }", // shard0 response
"{top: {x: {buckets:[{val:x2, count:4}, {val:x3, count:2}] } } }", // shard1 response
null, // shard0 expected refinement info
"=={top:{x:{_l:[x1]}}}" // shard1 expected refinement info
);
// same test w/o refinement turned on
doTestRefine("{top:{type:query, q:'foo_s:myquery', facet:{x : {type:terms, field:X, limit:2, refine:false} } } }",
"{top: {x: {buckets:[{val:x1, count:5}, {val:x2, count:3}] } } }", // shard0 response
"{top: {x: {buckets:[{val:x2, count:4}, {val:x3, count:2}] } } }", // shard1 response
null,
null
);
// same test, but nested in a terms facet
doTestRefine("{top:{type:terms, field:Afield, facet:{x : {type:terms, field:X, limit:2, refine:true} } } }",
"{top: {buckets:[{val:'A', count:2, x:{buckets:[{val:x1, count:5},{val:x2, count:3}]} } ] } }",
"{top: {buckets:[{val:'A', count:1, x:{buckets:[{val:x2, count:4},{val:x3, count:2}]} } ] } }",
null,
"=={top: {" +
"_s:[ ['A' , {x:{_l:[x1]}} ] ]" +
" } " +
"}"
);
// same test, but nested in range facet
doTestRefine("{top:{type:range, field:R, start:0, end:1, gap:1, facet:{x : {type:terms, field:X, limit:2, refine:true} } } }",
"{top: {buckets:[{val:0, count:2, x:{buckets:[{val:x1, count:5},{val:x2, count:3}]} } ] } }",
"{top: {buckets:[{val:0, count:1, x:{buckets:[{val:x2, count:4},{val:x3, count:2}]} } ] } }",
null,
"=={top: {" +
"_s:[ [0 , {x:{_l:[x1]}} ] ]" +
" } " +
"}"
);
}
}