mirror of https://github.com/apache/lucene.git
SOLR-7452: json facet API, refine/skip through buckets already visited
This commit is contained in:
parent
4171ef79b4
commit
6786089b0b
|
@ -19,6 +19,7 @@ package org.apache.solr.search.facet;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
|
@ -311,7 +312,7 @@ abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
|
|||
if (freq.missing) {
|
||||
// TODO: it would be more efficient to build up a missing DocSet if we need it here anyway.
|
||||
SimpleOrderedMap<Object> missingBucket = new SimpleOrderedMap<>();
|
||||
fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), null, false);
|
||||
fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), null, false, null);
|
||||
res.add("missing", missingBucket);
|
||||
}
|
||||
|
||||
|
@ -379,7 +380,7 @@ abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
|
|||
}
|
||||
}
|
||||
|
||||
processSubs(target, filter, subDomain, false);
|
||||
processSubs(target, filter, subDomain, false, null);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -513,31 +514,43 @@ abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
"qfacet":{"cat2":{"_l":["A"]}},
|
||||
"all":{"_s":[[
|
||||
"all",
|
||||
{"cat3":{"_l":["A"]}}]]},
|
||||
"cat1":{"_l":["A"]}}}
|
||||
|
||||
*/
|
||||
|
||||
static <T> List<T> asList(Object list) {
|
||||
return list != null ? (List<T>)list : Collections.EMPTY_LIST;
|
||||
}
|
||||
|
||||
protected SimpleOrderedMap<Object> refineFacets() throws IOException {
|
||||
List leaves = (List)fcontext.facetInfo.get("_l");
|
||||
List leaves = asList(fcontext.facetInfo.get("_l"));
|
||||
List<List> skip = asList(fcontext.facetInfo.get("_s"));
|
||||
List<List> missing = asList(fcontext.facetInfo.get("_m"));
|
||||
|
||||
// For leaf refinements, we do full faceting for each leaf bucket. Any sub-facets of these buckets will be fully evaluated. Because of this, we should never
|
||||
// encounter leaf refinements that have sub-facets that return partial results.
|
||||
|
||||
SimpleOrderedMap<Object> res = new SimpleOrderedMap<>();
|
||||
List<SimpleOrderedMap> bucketList = new ArrayList<>(leaves.size());
|
||||
List<SimpleOrderedMap> bucketList = new ArrayList<>( leaves.size() + skip.size() + missing.size() );
|
||||
res.add("buckets", bucketList);
|
||||
|
||||
// TODO: an alternate implementations can fill all accs at once
|
||||
createAccs(-1, 1);
|
||||
|
||||
FieldType ft = sf.getType();
|
||||
for (Object bucketVal : leaves) {
|
||||
SimpleOrderedMap<Object> bucket = new SimpleOrderedMap<>();
|
||||
bucketList.add(bucket);
|
||||
bucket.add("val", bucketVal);
|
||||
bucketList.add( refineBucket(bucketVal, false, null) );
|
||||
}
|
||||
for (List bucketAndFacetInfo : skip) {
|
||||
assert bucketAndFacetInfo.size() == 2;
|
||||
Object bucketVal = bucketAndFacetInfo.get(0);
|
||||
Map<String,Object> facetInfo = (Map<String, Object>) bucketAndFacetInfo.get(1);
|
||||
|
||||
// String internal = ft.toInternal( tobj.toString() ); // TODO - we need a better way to get from object to query...
|
||||
|
||||
Query domainQ = ft.getFieldQuery(null, sf, bucketVal.toString());
|
||||
|
||||
fillBucket(bucket, domainQ, null, false);
|
||||
bucketList.add( refineBucket(bucketVal, true, facetInfo ) );
|
||||
}
|
||||
|
||||
// If there are just a couple of leaves, and if the domain is large, then
|
||||
|
@ -548,4 +561,17 @@ abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
|
|||
return res;
|
||||
}
|
||||
|
||||
private SimpleOrderedMap<Object> refineBucket(Object bucketVal, boolean skip, Map<String,Object> facetInfo) throws IOException {
|
||||
SimpleOrderedMap<Object> bucket = new SimpleOrderedMap<>();
|
||||
FieldType ft = sf.getType();
|
||||
bucket.add("val", bucketVal);
|
||||
// String internal = ft.toInternal( tobj.toString() ); // TODO - we need a better way to get from object to query...
|
||||
|
||||
Query domainQ = ft.getFieldQuery(null, sf, bucketVal.toString());
|
||||
|
||||
fillBucket(bucket, domainQ, null, skip, facetInfo);
|
||||
|
||||
return bucket;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -333,7 +333,7 @@ class FacetFieldProcessorByEnumTermsStream extends FacetFieldProcessor implement
|
|||
bucket.add("val", bucketVal);
|
||||
addStats(bucket, 0);
|
||||
if (hasSubFacets) {
|
||||
processSubs(bucket, bucketQuery, termSet, false);
|
||||
processSubs(bucket, bucketQuery, termSet, false, null);
|
||||
}
|
||||
|
||||
// TODO... termSet needs to stick around for streaming sub-facets?
|
||||
|
|
|
@ -235,6 +235,7 @@ public class FacetModule extends SearchComponent {
|
|||
Map<String,Object> finfo = new HashMap<>(1);
|
||||
finfo.put(FACET_REFINE, refinement);
|
||||
String finfoStr = JSONUtil.toJSON(finfo);
|
||||
// System.err.println("##################### REFINE=" + finfoStr);
|
||||
shardsRefineRequest.params.add(FACET_INFO, finfoStr);
|
||||
|
||||
if (newRequest) {
|
||||
|
|
|
@ -367,7 +367,7 @@ public abstract class FacetProcessor<FacetRequestT extends FacetRequest> {
|
|||
}
|
||||
|
||||
// TODO: rather than just have a raw "response", perhaps we should model as a bucket object that contains the response plus extra info?
|
||||
void fillBucket(SimpleOrderedMap<Object> bucket, Query q, DocSet result, boolean skip) throws IOException {
|
||||
void fillBucket(SimpleOrderedMap<Object> bucket, Query q, DocSet result, boolean skip, Map<String,Object> facetInfo) throws IOException {
|
||||
|
||||
// TODO: we don't need the DocSet if we've already calculated everything during the first phase
|
||||
boolean needDocSet = freq.getFacetStats().size() > 0 || freq.getSubFacets().size() > 0;
|
||||
|
@ -398,7 +398,7 @@ public abstract class FacetProcessor<FacetRequestT extends FacetRequest> {
|
|||
if (!skip) {
|
||||
processStats(bucket, result, count);
|
||||
}
|
||||
processSubs(bucket, q, result, skip);
|
||||
processSubs(bucket, q, result, skip, facetInfo);
|
||||
} finally {
|
||||
if (result != null) {
|
||||
// result.decref(); // OFF-HEAP
|
||||
|
@ -407,7 +407,7 @@ public abstract class FacetProcessor<FacetRequestT extends FacetRequest> {
|
|||
}
|
||||
}
|
||||
|
||||
void processSubs(SimpleOrderedMap<Object> response, Query filter, DocSet domain, boolean skip) throws IOException {
|
||||
void processSubs(SimpleOrderedMap<Object> response, Query filter, DocSet domain, boolean skip, Map<String,Object> facetInfo) throws IOException {
|
||||
|
||||
boolean emptyDomain = domain == null || domain.size() == 0;
|
||||
|
||||
|
@ -423,8 +423,8 @@ public abstract class FacetProcessor<FacetRequestT extends FacetRequest> {
|
|||
}
|
||||
|
||||
Map<String,Object>facetInfoSub = null;
|
||||
if (fcontext.facetInfo != null) {
|
||||
facetInfoSub = (Map<String,Object>)fcontext.facetInfo.get(sub.getKey());
|
||||
if (facetInfo != null) {
|
||||
facetInfoSub = (Map<String,Object>)facetInfo.get(sub.getKey());
|
||||
}
|
||||
|
||||
// If we're skipping this node, then we only need to process sub-facets that have facet info specified.
|
||||
|
|
|
@ -61,7 +61,7 @@ class FacetQueryProcessor extends FacetProcessor<FacetQuery> {
|
|||
// FIXME - what needs to be done here?
|
||||
}
|
||||
response = new SimpleOrderedMap<>();
|
||||
fillBucket(response, freq.q, null, (fcontext.flags & FacetContext.SKIP_FACET)!=0);
|
||||
fillBucket(response, freq.q, null, (fcontext.flags & FacetContext.SKIP_FACET)!=0, fcontext.facetInfo);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -350,7 +350,7 @@ class FacetRangeProcessor extends FacetProcessor<FacetRange> {
|
|||
if (freq.getSubFacets().size() > 0) {
|
||||
DocSet subBase = intersections[slot];
|
||||
try {
|
||||
processSubs(bucket, filters[slot], subBase, false);
|
||||
processSubs(bucket, filters[slot], subBase, false, null);
|
||||
} finally {
|
||||
// subContext.base.decref(); // OFF-HEAP
|
||||
// subContext.base = null; // do not modify context after creation... there may be deferred execution (i.e. streaming)
|
||||
|
@ -367,7 +367,7 @@ class FacetRangeProcessor extends FacetProcessor<FacetRange> {
|
|||
}
|
||||
|
||||
Query rangeQ = sf.getType().getRangeQuery(null, sf, range.low == null ? null : calc.formatValue(range.low), range.high==null ? null : calc.formatValue(range.high), range.includeLower, range.includeUpper);
|
||||
fillBucket(bucket, rangeQ, null, false);
|
||||
fillBucket(bucket, rangeQ, null, false, null);
|
||||
|
||||
return bucket;
|
||||
}
|
||||
|
|
|
@ -227,16 +227,16 @@ public class TestJsonFacetRefinement extends SolrTestCaseHS {
|
|||
String cat_s = p.get("cat_s");
|
||||
String num_d = p.get("num_d");
|
||||
|
||||
clients.get(0).add( sdoc("id", "01", cat_s, "A", num_d, -1) ); // A wins count tie
|
||||
clients.get(0).add( sdoc("id", "02", cat_s, "B", num_d, 3) );
|
||||
clients.get(0).add( sdoc("id", "01", "all_s","all", cat_s, "A", num_d, -1) ); // A wins count tie
|
||||
clients.get(0).add( sdoc("id", "02", "all_s","all", cat_s, "B", num_d, 3) );
|
||||
|
||||
clients.get(1).add( sdoc("id", "11", cat_s, "B", num_d, -5) ); // B highest count
|
||||
clients.get(1).add( sdoc("id", "12", cat_s, "B", num_d, -11) );
|
||||
clients.get(1).add( sdoc("id", "13", cat_s, "A", num_d, 7) );
|
||||
clients.get(1).add( sdoc("id", "11", "all_s","all", cat_s, "B", num_d, -5) ); // B highest count
|
||||
clients.get(1).add( sdoc("id", "12", "all_s","all", cat_s, "B", num_d, -11) );
|
||||
clients.get(1).add( sdoc("id", "13", "all_s","all", cat_s, "A", num_d, 7) );
|
||||
|
||||
clients.get(2).add( sdoc("id", "21", cat_s, "A", num_d, 17) ); // A highest count
|
||||
clients.get(2).add( sdoc("id", "22", cat_s, "A", num_d, -19) );
|
||||
clients.get(2).add( sdoc("id", "23", cat_s, "B", num_d, 11) );
|
||||
clients.get(2).add( sdoc("id", "21", "all_s","all", cat_s, "A", num_d, 17) ); // A highest count
|
||||
clients.get(2).add( sdoc("id", "22", "all_s","all", cat_s, "A", num_d, -19) );
|
||||
clients.get(2).add( sdoc("id", "23", "all_s","all", cat_s, "B", num_d, 11) );
|
||||
|
||||
client.commit();
|
||||
|
||||
|
@ -291,12 +291,16 @@ public class TestJsonFacetRefinement extends SolrTestCaseHS {
|
|||
"json.facet", "{" +
|
||||
" cat0:{type:terms, field:${cat_s}, sort:'min1 asc', limit:1, overrequest:0, refine:false, facet:{ min1:'min(${num_d})'} }" +
|
||||
",cat1:{type:terms, field:${cat_s}, sort:'min1 asc', limit:1, overrequest:0, refine:true, facet:{ min1:'min(${num_d})'} }" +
|
||||
",qfacet:{type:query, q:'*:*', facet:{ cat2:{type:terms, field:${cat_s}, sort:'min1 asc', limit:1, overrequest:0, refine:true, facet:{ min1:'min(${num_d})'} } }}" + // refinement needed through a query facet
|
||||
",allf:{type:terms, field:all_s, facet:{ cat3:{type:terms, field:${cat_s}, sort:'min1 asc', limit:1, overrequest:0, refine:true, facet:{ min1:'min(${num_d})'} } }}" + // refinement needed through field facet
|
||||
",sum1:'sum(num_d)'" + // make sure that root bucket stats aren't affected by refinement
|
||||
"}"
|
||||
)
|
||||
, "facets=={ count:8" +
|
||||
", cat0:{ buckets:[ {val:A,count:3, min1:-19.0} ] }" + // B wins in shard2, so we're missing the "A" count for that shar w/o refinement.
|
||||
", cat0:{ buckets:[ {val:A,count:3, min1:-19.0} ] }" + // B wins in shard2, so we're missing the "A" count for that shard w/o refinement.
|
||||
", cat1:{ buckets:[ {val:A,count:4, min1:-19.0} ] }" + // with refinement, we get the right count
|
||||
", qfacet:{ count:8, cat2:{ buckets:[ {val:A,count:4, min1:-19.0} ] } }" + // just like the previous response, just nested under a query facet
|
||||
", allf:{ buckets:[ {cat3:{ buckets:[ {val:A,count:4, min1:-19.0} ] } ,count:8,val:all }] }" + // just like the previous response, just nested under a field facet
|
||||
", sum1:2.0" +
|
||||
"}"
|
||||
);
|
||||
|
|
Loading…
Reference in New Issue