SOLR-7452: json facet API, refine/skip through buckets already visited

This commit is contained in:
yonik 2017-03-21 08:42:33 -04:00
parent 4171ef79b4
commit 6786089b0b
7 changed files with 62 additions and 31 deletions

View File

@ -19,6 +19,7 @@ package org.apache.solr.search.facet;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
@ -311,7 +312,7 @@ abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
if (freq.missing) {
// TODO: it would be more efficient to build up a missing DocSet if we need it here anyway.
SimpleOrderedMap<Object> missingBucket = new SimpleOrderedMap<>();
fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), null, false);
fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), null, false, null);
res.add("missing", missingBucket);
}
@ -379,7 +380,7 @@ abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
}
}
processSubs(target, filter, subDomain, false);
processSubs(target, filter, subDomain, false, null);
}
@Override
@ -513,31 +514,43 @@ abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
}
/*
"qfacet":{"cat2":{"_l":["A"]}},
"all":{"_s":[[
"all",
{"cat3":{"_l":["A"]}}]]},
"cat1":{"_l":["A"]}}}
*/
static <T> List<T> asList(Object list) {
return list != null ? (List<T>)list : Collections.EMPTY_LIST;
}
protected SimpleOrderedMap<Object> refineFacets() throws IOException {
List leaves = (List)fcontext.facetInfo.get("_l");
List leaves = asList(fcontext.facetInfo.get("_l"));
List<List> skip = asList(fcontext.facetInfo.get("_s"));
List<List> missing = asList(fcontext.facetInfo.get("_m"));
// For leaf refinements, we do full faceting for each leaf bucket. Any sub-facets of these buckets will be fully evaluated. Because of this, we should never
// encounter leaf refinements that have sub-facets that return partial results.
SimpleOrderedMap<Object> res = new SimpleOrderedMap<>();
List<SimpleOrderedMap> bucketList = new ArrayList<>(leaves.size());
List<SimpleOrderedMap> bucketList = new ArrayList<>( leaves.size() + skip.size() + missing.size() );
res.add("buckets", bucketList);
// TODO: an alternate implementations can fill all accs at once
createAccs(-1, 1);
FieldType ft = sf.getType();
for (Object bucketVal : leaves) {
SimpleOrderedMap<Object> bucket = new SimpleOrderedMap<>();
bucketList.add(bucket);
bucket.add("val", bucketVal);
bucketList.add( refineBucket(bucketVal, false, null) );
}
for (List bucketAndFacetInfo : skip) {
assert bucketAndFacetInfo.size() == 2;
Object bucketVal = bucketAndFacetInfo.get(0);
Map<String,Object> facetInfo = (Map<String, Object>) bucketAndFacetInfo.get(1);
// String internal = ft.toInternal( tobj.toString() ); // TODO - we need a better way to get from object to query...
Query domainQ = ft.getFieldQuery(null, sf, bucketVal.toString());
fillBucket(bucket, domainQ, null, false);
bucketList.add( refineBucket(bucketVal, true, facetInfo ) );
}
// If there are just a couple of leaves, and if the domain is large, then
@ -548,4 +561,17 @@ abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
return res;
}
private SimpleOrderedMap<Object> refineBucket(Object bucketVal, boolean skip, Map<String,Object> facetInfo) throws IOException {
SimpleOrderedMap<Object> bucket = new SimpleOrderedMap<>();
FieldType ft = sf.getType();
bucket.add("val", bucketVal);
// String internal = ft.toInternal( tobj.toString() ); // TODO - we need a better way to get from object to query...
Query domainQ = ft.getFieldQuery(null, sf, bucketVal.toString());
fillBucket(bucket, domainQ, null, skip, facetInfo);
return bucket;
}
}

View File

@ -333,7 +333,7 @@ class FacetFieldProcessorByEnumTermsStream extends FacetFieldProcessor implement
bucket.add("val", bucketVal);
addStats(bucket, 0);
if (hasSubFacets) {
processSubs(bucket, bucketQuery, termSet, false);
processSubs(bucket, bucketQuery, termSet, false, null);
}
// TODO... termSet needs to stick around for streaming sub-facets?

View File

@ -235,6 +235,7 @@ public class FacetModule extends SearchComponent {
Map<String,Object> finfo = new HashMap<>(1);
finfo.put(FACET_REFINE, refinement);
String finfoStr = JSONUtil.toJSON(finfo);
// System.err.println("##################### REFINE=" + finfoStr);
shardsRefineRequest.params.add(FACET_INFO, finfoStr);
if (newRequest) {

View File

@ -367,7 +367,7 @@ public abstract class FacetProcessor<FacetRequestT extends FacetRequest> {
}
// TODO: rather than just have a raw "response", perhaps we should model as a bucket object that contains the response plus extra info?
void fillBucket(SimpleOrderedMap<Object> bucket, Query q, DocSet result, boolean skip) throws IOException {
void fillBucket(SimpleOrderedMap<Object> bucket, Query q, DocSet result, boolean skip, Map<String,Object> facetInfo) throws IOException {
// TODO: we don't need the DocSet if we've already calculated everything during the first phase
boolean needDocSet = freq.getFacetStats().size() > 0 || freq.getSubFacets().size() > 0;
@ -398,7 +398,7 @@ public abstract class FacetProcessor<FacetRequestT extends FacetRequest> {
if (!skip) {
processStats(bucket, result, count);
}
processSubs(bucket, q, result, skip);
processSubs(bucket, q, result, skip, facetInfo);
} finally {
if (result != null) {
// result.decref(); // OFF-HEAP
@ -407,7 +407,7 @@ public abstract class FacetProcessor<FacetRequestT extends FacetRequest> {
}
}
void processSubs(SimpleOrderedMap<Object> response, Query filter, DocSet domain, boolean skip) throws IOException {
void processSubs(SimpleOrderedMap<Object> response, Query filter, DocSet domain, boolean skip, Map<String,Object> facetInfo) throws IOException {
boolean emptyDomain = domain == null || domain.size() == 0;
@ -423,8 +423,8 @@ public abstract class FacetProcessor<FacetRequestT extends FacetRequest> {
}
Map<String,Object>facetInfoSub = null;
if (fcontext.facetInfo != null) {
facetInfoSub = (Map<String,Object>)fcontext.facetInfo.get(sub.getKey());
if (facetInfo != null) {
facetInfoSub = (Map<String,Object>)facetInfo.get(sub.getKey());
}
// If we're skipping this node, then we only need to process sub-facets that have facet info specified.

View File

@ -61,7 +61,7 @@ class FacetQueryProcessor extends FacetProcessor<FacetQuery> {
// FIXME - what needs to be done here?
}
response = new SimpleOrderedMap<>();
fillBucket(response, freq.q, null, (fcontext.flags & FacetContext.SKIP_FACET)!=0);
fillBucket(response, freq.q, null, (fcontext.flags & FacetContext.SKIP_FACET)!=0, fcontext.facetInfo);
}

View File

@ -350,7 +350,7 @@ class FacetRangeProcessor extends FacetProcessor<FacetRange> {
if (freq.getSubFacets().size() > 0) {
DocSet subBase = intersections[slot];
try {
processSubs(bucket, filters[slot], subBase, false);
processSubs(bucket, filters[slot], subBase, false, null);
} finally {
// subContext.base.decref(); // OFF-HEAP
// subContext.base = null; // do not modify context after creation... there may be deferred execution (i.e. streaming)
@ -367,7 +367,7 @@ class FacetRangeProcessor extends FacetProcessor<FacetRange> {
}
Query rangeQ = sf.getType().getRangeQuery(null, sf, range.low == null ? null : calc.formatValue(range.low), range.high==null ? null : calc.formatValue(range.high), range.includeLower, range.includeUpper);
fillBucket(bucket, rangeQ, null, false);
fillBucket(bucket, rangeQ, null, false, null);
return bucket;
}

View File

@ -227,16 +227,16 @@ public class TestJsonFacetRefinement extends SolrTestCaseHS {
String cat_s = p.get("cat_s");
String num_d = p.get("num_d");
clients.get(0).add( sdoc("id", "01", cat_s, "A", num_d, -1) ); // A wins count tie
clients.get(0).add( sdoc("id", "02", cat_s, "B", num_d, 3) );
clients.get(0).add( sdoc("id", "01", "all_s","all", cat_s, "A", num_d, -1) ); // A wins count tie
clients.get(0).add( sdoc("id", "02", "all_s","all", cat_s, "B", num_d, 3) );
clients.get(1).add( sdoc("id", "11", cat_s, "B", num_d, -5) ); // B highest count
clients.get(1).add( sdoc("id", "12", cat_s, "B", num_d, -11) );
clients.get(1).add( sdoc("id", "13", cat_s, "A", num_d, 7) );
clients.get(1).add( sdoc("id", "11", "all_s","all", cat_s, "B", num_d, -5) ); // B highest count
clients.get(1).add( sdoc("id", "12", "all_s","all", cat_s, "B", num_d, -11) );
clients.get(1).add( sdoc("id", "13", "all_s","all", cat_s, "A", num_d, 7) );
clients.get(2).add( sdoc("id", "21", cat_s, "A", num_d, 17) ); // A highest count
clients.get(2).add( sdoc("id", "22", cat_s, "A", num_d, -19) );
clients.get(2).add( sdoc("id", "23", cat_s, "B", num_d, 11) );
clients.get(2).add( sdoc("id", "21", "all_s","all", cat_s, "A", num_d, 17) ); // A highest count
clients.get(2).add( sdoc("id", "22", "all_s","all", cat_s, "A", num_d, -19) );
clients.get(2).add( sdoc("id", "23", "all_s","all", cat_s, "B", num_d, 11) );
client.commit();
@ -291,12 +291,16 @@ public class TestJsonFacetRefinement extends SolrTestCaseHS {
"json.facet", "{" +
" cat0:{type:terms, field:${cat_s}, sort:'min1 asc', limit:1, overrequest:0, refine:false, facet:{ min1:'min(${num_d})'} }" +
",cat1:{type:terms, field:${cat_s}, sort:'min1 asc', limit:1, overrequest:0, refine:true, facet:{ min1:'min(${num_d})'} }" +
",qfacet:{type:query, q:'*:*', facet:{ cat2:{type:terms, field:${cat_s}, sort:'min1 asc', limit:1, overrequest:0, refine:true, facet:{ min1:'min(${num_d})'} } }}" + // refinement needed through a query facet
",allf:{type:terms, field:all_s, facet:{ cat3:{type:terms, field:${cat_s}, sort:'min1 asc', limit:1, overrequest:0, refine:true, facet:{ min1:'min(${num_d})'} } }}" + // refinement needed through field facet
",sum1:'sum(num_d)'" + // make sure that root bucket stats aren't affected by refinement
"}"
)
, "facets=={ count:8" +
", cat0:{ buckets:[ {val:A,count:3, min1:-19.0} ] }" + // B wins in shard2, so we're missing the "A" count for that shar w/o refinement.
", cat0:{ buckets:[ {val:A,count:3, min1:-19.0} ] }" + // B wins in shard2, so we're missing the "A" count for that shard w/o refinement.
", cat1:{ buckets:[ {val:A,count:4, min1:-19.0} ] }" + // with refinement, we get the right count
", qfacet:{ count:8, cat2:{ buckets:[ {val:A,count:4, min1:-19.0} ] } }" + // just like the previous response, just nested under a query facet
", allf:{ buckets:[ {cat3:{ buckets:[ {val:A,count:4, min1:-19.0} ] } ,count:8,val:all }] }" + // just like the previous response, just nested under a field facet
", sum1:2.0" +
"}"
);