SOLR-7452: add support for _m buckets, missing and has sub-facets in need of refinement

This commit is contained in:
yonik 2017-03-24 20:43:44 -04:00 committed by Shalin Shekhar Mangar
parent b1a574df4e
commit a42704ec4b
2 changed files with 60 additions and 9 deletions

View File

@ -545,6 +545,7 @@ abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
for (Object bucketVal : leaves) {
bucketList.add( refineBucket(bucketVal, false, null) );
}
for (List bucketAndFacetInfo : skip) {
assert bucketAndFacetInfo.size() == 2;
Object bucketVal = bucketAndFacetInfo.get(0);
@ -553,6 +554,16 @@ abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
bucketList.add( refineBucket(bucketVal, true, facetInfo ) );
}
// The only difference between skip and missing is the value of "skip" passed to refineBucket
for (List bucketAndFacetInfo : missing) {
assert bucketAndFacetInfo.size() == 2;
Object bucketVal = bucketAndFacetInfo.get(0);
Map<String,Object> facetInfo = (Map<String, Object>) bucketAndFacetInfo.get(1);
bucketList.add( refineBucket(bucketVal, false, facetInfo ) );
}
// If there are just a couple of leaves, and if the domain is large, then
// going by term is likely the most efficient?
// If the domain is small, or if the number of leaves is large, then doing

View File

@ -209,6 +209,17 @@ public class TestJsonFacetRefinement extends SolrTestCaseHS {
"}"
);
// for testing missing _m, we need a partial facet within a partial facet
doTestRefine("{top:{type:terms, field:Afield, refine:true, limit:1, facet:{x : {type:terms, field:X, limit:1, refine:true} } } }",
"{top: {buckets:[{val:'A', count:2, x:{buckets:[{val:x1, count:5},{val:x2, count:3}]} } ] } }",
"{top: {buckets:[{val:'B', count:1, x:{buckets:[{val:x2, count:4},{val:x3, count:2}]} } ] } }",
null,
"=={top: {" +
"_m:[ ['A' , {x:{_l:[x1]}} ] ]" +
" } " +
"}"
);
}
@ -223,20 +234,21 @@ public class TestJsonFacetRefinement extends SolrTestCaseHS {
client.deleteByQuery("*:*", null);
ModifiableSolrParams p = params("cat_s", "cat_s", "num_d", "num_d");
ModifiableSolrParams p = params("cat_s", "cat_s", "xy_s", "xy_s", "num_d", "num_d");
String cat_s = p.get("cat_s");
String xy_s = p.get("xy_s");
String num_d = p.get("num_d");
clients.get(0).add( sdoc("id", "01", "all_s","all", cat_s, "A", num_d, -1) ); // A wins count tie
clients.get(0).add( sdoc("id", "02", "all_s","all", cat_s, "B", num_d, 3) );
clients.get(0).add( sdoc("id", "01", "all_s","all", cat_s, "A", xy_s, "X" ,num_d, -1) ); // A wins count tie
clients.get(0).add( sdoc("id", "02", "all_s","all", cat_s, "B", xy_s, "Y", num_d, 3) );
clients.get(1).add( sdoc("id", "11", "all_s","all", cat_s, "B", num_d, -5) ); // B highest count
clients.get(1).add( sdoc("id", "12", "all_s","all", cat_s, "B", num_d, -11) );
clients.get(1).add( sdoc("id", "13", "all_s","all", cat_s, "A", num_d, 7) );
clients.get(1).add( sdoc("id", "11", "all_s","all", cat_s, "B", xy_s, "X", num_d, -5) ); // B highest count
clients.get(1).add( sdoc("id", "12", "all_s","all", cat_s, "B", xy_s, "Y", num_d, -11) );
clients.get(1).add( sdoc("id", "13", "all_s","all", cat_s, "A", xy_s, "X", num_d, 7) );
clients.get(2).add( sdoc("id", "21", "all_s","all", cat_s, "A", num_d, 17) ); // A highest count
clients.get(2).add( sdoc("id", "22", "all_s","all", cat_s, "A", num_d, -19) );
clients.get(2).add( sdoc("id", "23", "all_s","all", cat_s, "B", num_d, 11) );
clients.get(2).add( sdoc("id", "21", "all_s","all", cat_s, "A", xy_s, "X", num_d, 17) ); // A highest count
clients.get(2).add( sdoc("id", "22", "all_s","all", cat_s, "A", xy_s, "Y", num_d, -19) );
clients.get(2).add( sdoc("id", "23", "all_s","all", cat_s, "B", xy_s, "X", num_d, 11) );
client.commit();
@ -255,6 +267,7 @@ public class TestJsonFacetRefinement extends SolrTestCaseHS {
);
****/
client.testJQ(params(p, "q", "*:*",
"json.facet", "{" +
"cat0:{type:terms, field:${cat_s}, sort:'count desc', limit:1, overrequest:0, refine:false}" +
@ -305,6 +318,33 @@ public class TestJsonFacetRefinement extends SolrTestCaseHS {
"}"
);
// test missing buckets (field facet within field facet)
client.testJQ(params(p, "q", "*:*",
"json.facet", "{" +
"ab:{type:terms, field:${cat_s}, limit:1, overrequest:0, refine:true, facet:{ xy:{type:terms, field:${xy_s}, limit:1, overrequest:0, refine:true } }}" +
"}"
)
, "facets=={ count:8" +
", ab:{ buckets:[ {val:A, count:4, xy:{buckets:[ {val:X,count:3}]} }] }" + // just like the previous response, just nested under a field facet
"}"
);
// test that sibling facets and stats are included for _m buckets, but skipped for _s buckets
client.testJQ(params(p, "q", "*:*",
"json.facet", "{" +
" ab :{type:terms, field:${cat_s}, limit:1, overrequest:0, refine:true, facet:{ xy:{type:terms, field:${xy_s}, limit:1, overrequest:0, refine:true}, qq:{query:'*:*'},ww:'sum(${num_d})' }}" +
",ab2:{type:terms, field:${cat_s}, limit:1, overrequest:0, refine:false, facet:{ xy:{type:terms, field:${xy_s}, limit:1, overrequest:0, refine:true}, qq:{query:'*:*'},ww:'sum(${num_d})' }}" + // top level refine=false shouldn't matter
",allf :{type:terms, field:all_s, limit:1, overrequest:0, refine:true, facet:{cat:{type:terms, field:${cat_s}, limit:1, overrequest:0, refine:true}, qq:{query:'*:*'},ww:'sum(${num_d})' }}" +
",allf2:{type:terms, field:all_s, limit:1, overrequest:0, refine:false, facet:{cat:{type:terms, field:${cat_s}, limit:1, overrequest:0, refine:true}, qq:{query:'*:*'},ww:'sum(${num_d})' }}" + // top level refine=false shouldn't matter
"}"
)
, "facets=={ count:8" +
", ab:{ buckets:[ {val:A, count:4, xy:{buckets:[ {val:X,count:3}]} ,qq:{count:4}, ww:4.0 }] }" + // make sure qq and ww are included for _m buckets
", allf:{ buckets:[ {count:8, val:all, cat:{buckets:[{val:A,count:4}]} ,qq:{count:8}, ww:2.0 }] }" + // make sure qq and ww are excluded (not calculated again in another phase) for _s buckets
", ab2:{ buckets:[ {val:A, count:4, xy:{buckets:[ {val:X,count:3}]} ,qq:{count:4}, ww:4.0 }] }" + // make sure qq and ww are included for _m buckets
", allf2:{ buckets:[ {count:8, val:all, cat:{buckets:[{val:A,count:4}]} ,qq:{count:8}, ww:2.0 }] }" + // make sure qq and ww are excluded (not calculated again in another phase) for _s buckets
"}"
);
}