mirror of https://github.com/apache/lucene.git
SOLR-7452: JSON Facet API - refining for numBuckets
This commit is contained in:
parent
680f4d7fd3
commit
4f89f98f66
|
@ -290,17 +290,7 @@ abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
|
|||
if (!fcontext.isShard()) {
|
||||
res.add("numBuckets", numBuckets);
|
||||
} else {
|
||||
DocSet domain = fcontext.base;
|
||||
if (freq.prefix != null) {
|
||||
Query prefixFilter = sf.getType().getPrefixQuery(null, sf, freq.prefix);
|
||||
domain = fcontext.searcher.getDocSet(prefixFilter, domain);
|
||||
}
|
||||
|
||||
HLLAgg agg = new HLLAgg(freq.field);
|
||||
SlotAcc acc = agg.createSlotAcc(fcontext, domain.size(), 1);
|
||||
acc.collect(domain, 0);
|
||||
acc.key = "numBuckets";
|
||||
acc.setValues(res, 0);
|
||||
calculateNumBuckets(res);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -351,6 +341,20 @@ abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
|
|||
return res;
|
||||
}
|
||||
|
||||
private void calculateNumBuckets(SimpleOrderedMap<Object> target) throws IOException {
|
||||
DocSet domain = fcontext.base;
|
||||
if (freq.prefix != null) {
|
||||
Query prefixFilter = sf.getType().getPrefixQuery(null, sf, freq.prefix);
|
||||
domain = fcontext.searcher.getDocSet(prefixFilter, domain);
|
||||
}
|
||||
|
||||
HLLAgg agg = new HLLAgg(freq.field);
|
||||
SlotAcc acc = agg.createSlotAcc(fcontext, domain.size(), 1);
|
||||
acc.collect(domain, 0);
|
||||
acc.key = "numBuckets";
|
||||
acc.setValues(target, 0);
|
||||
}
|
||||
|
||||
private static class Slot {
|
||||
int slot;
|
||||
}
|
||||
|
@ -582,6 +586,10 @@ abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
|
|||
}
|
||||
}
|
||||
|
||||
if (freq.numBuckets && !skipThisFacet) {
|
||||
calculateNumBuckets(res);
|
||||
}
|
||||
|
||||
// If there are just a couple of leaves, and if the domain is large, then
|
||||
// going by term is likely the most efficient?
|
||||
// If the domain is small, or if the number of leaves is large, then doing
|
||||
|
|
|
@ -66,7 +66,7 @@ abstract class FacetFieldProcessorByArray extends FacetFieldProcessor {
|
|||
refineResult = refineFacets();
|
||||
// if we've seen this facet bucket, then refining can be done. If we haven't, we still
|
||||
// only need to continue if we need allBuckets or numBuckets info.
|
||||
if (skipThisFacet || (!freq.allBuckets && !freq.numBuckets)) return refineResult;
|
||||
if (skipThisFacet || !freq.allBuckets) return refineResult;
|
||||
}
|
||||
|
||||
String prefix = freq.prefix;
|
||||
|
|
|
@ -244,22 +244,23 @@ public class TestJsonFacetRefinement extends SolrTestCaseHS {
|
|||
|
||||
client.deleteByQuery("*:*", null);
|
||||
|
||||
ModifiableSolrParams p = params("cat_s", "cat_s", "xy_s", "xy_s", "num_d", "num_d", "qw_s", "qw_s");
|
||||
ModifiableSolrParams p = params("cat_s", "cat_s", "xy_s", "xy_s", "num_d", "num_d", "qw_s", "qw_s", "er_s","er_s");
|
||||
String cat_s = p.get("cat_s");
|
||||
String xy_s = p.get("xy_s");
|
||||
String qw_s = p.get("qw_s");
|
||||
String er_s = p.get("er_s"); // this field is designed to test numBuckets refinement... the first phase will only have a single bucket returned for the top count bucket of cat_s
|
||||
String num_d = p.get("num_d");
|
||||
|
||||
clients.get(0).add( sdoc("id", "01", "all_s","all", cat_s, "A", xy_s, "X" ,num_d, -1, qw_s, "Q") ); // A wins count tie
|
||||
clients.get(0).add( sdoc("id", "02", "all_s","all", cat_s, "B", xy_s, "Y", num_d, 3 ) );
|
||||
clients.get(0).add( sdoc("id", "01", "all_s","all", cat_s, "A", xy_s, "X" ,num_d, -1, qw_s, "Q", er_s,"E") ); // A wins count tie
|
||||
clients.get(0).add( sdoc("id", "02", "all_s","all", cat_s, "B", xy_s, "Y", num_d, 3 ) );
|
||||
|
||||
clients.get(1).add( sdoc("id", "11", "all_s","all", cat_s, "B", xy_s, "X", num_d, -5 ) ); // B highest count
|
||||
clients.get(1).add( sdoc("id", "12", "all_s","all", cat_s, "B", xy_s, "Y", num_d, -11, qw_s, "W") );
|
||||
clients.get(1).add( sdoc("id", "13", "all_s","all", cat_s, "A", xy_s, "X", num_d, 7 ) );
|
||||
clients.get(1).add( sdoc("id", "11", "all_s","all", cat_s, "B", xy_s, "X", num_d, -5 , er_s,"E") ); // B highest count
|
||||
clients.get(1).add( sdoc("id", "12", "all_s","all", cat_s, "B", xy_s, "Y", num_d, -11, qw_s, "W" ) );
|
||||
clients.get(1).add( sdoc("id", "13", "all_s","all", cat_s, "A", xy_s, "X", num_d, 7 , er_s,"R") ); // "R" will only be picked up via refinement when parent facet is cat_s
|
||||
|
||||
clients.get(2).add( sdoc("id", "21", "all_s","all", cat_s, "A", xy_s, "X", num_d, 17, qw_s, "W") ); // A highest count
|
||||
clients.get(2).add( sdoc("id", "22", "all_s","all", cat_s, "A", xy_s, "Y", num_d, -19 ) );
|
||||
clients.get(2).add( sdoc("id", "23", "all_s","all", cat_s, "B", xy_s, "X", num_d, 11 ) );
|
||||
clients.get(2).add( sdoc("id", "21", "all_s","all", cat_s, "A", xy_s, "X", num_d, 17, qw_s, "W", er_s,"E") ); // A highest count
|
||||
clients.get(2).add( sdoc("id", "22", "all_s","all", cat_s, "A", xy_s, "Y", num_d, -19 ) );
|
||||
clients.get(2).add( sdoc("id", "23", "all_s","all", cat_s, "B", xy_s, "X", num_d, 11 ) );
|
||||
|
||||
client.commit();
|
||||
|
||||
|
@ -388,7 +389,6 @@ public class TestJsonFacetRefinement extends SolrTestCaseHS {
|
|||
);
|
||||
|
||||
// test filling in missing "allBuckets"
|
||||
// test filling in "missing" bucket for partially refined facets
|
||||
client.testJQ(params(p, "q", "*:*",
|
||||
"json.facet", "{" +
|
||||
" cat :{type:terms, field:${cat_s}, limit:1, overrequest:0, refine:false, allBuckets:true, facet:{ xy:{type:terms, field:${xy_s}, limit:1, overrequest:0, allBuckets:true, refine:false} } }" +
|
||||
|
@ -402,6 +402,21 @@ public class TestJsonFacetRefinement extends SolrTestCaseHS {
|
|||
",cat3:{ allBuckets:{count:8}, buckets:[ {val:A, count:4, xy:{buckets:[{count:3, val:X, f:23.0}], allBuckets:{count:4, f:4.0}}}] }" +
|
||||
"}"
|
||||
);
|
||||
|
||||
// test filling in missing numBuckets
|
||||
client.testJQ(params(p, "q", "*:*",
|
||||
"json.facet", "{" +
|
||||
" cat :{type:terms, field:${cat_s}, limit:1, overrequest:0, refine:false, numBuckets:true, facet:{ er:{type:terms, field:${er_s}, limit:1, overrequest:0, numBuckets:true, refine:false} } }" +
|
||||
", cat2:{type:terms, field:${cat_s}, limit:1, overrequest:0, refine:true , numBuckets:true, facet:{ er:{type:terms, field:${er_s}, limit:1, overrequest:0, numBuckets:true, refine:true } } }" +
|
||||
"}"
|
||||
)
|
||||
, "facets=={ count:8" +
|
||||
", cat:{ numBuckets:2, buckets:[ {val:A, count:3, er:{numBuckets:1,buckets:[{count:2, val:E}] }}] }" + // the "R" bucket will not be seen w/o refinement
|
||||
",cat2:{ numBuckets:2, buckets:[ {val:A, count:4, er:{numBuckets:2,buckets:[{count:2, val:E}] }}] }" +
|
||||
"}"
|
||||
);
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue