mirror of https://github.com/apache/lucene.git
SOLR-6187: facet.mincount ignored in range faceting using distributed search
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1623429 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
6a4ea25666
commit
7fbd55f679
|
@ -179,6 +179,10 @@ Bug Fixes
|
|||
* SOLR-6467: bin/solr script should direct stdout/stderr when starting in the background
|
||||
to the solr-PORT-console.log in the logs directory instead of bin. (Timothy Potter)
|
||||
|
||||
* SOLR-6187: SOLR-6154: facet.mincount ignored in range faceting using distributed search
|
||||
NOTE: This does NOT fixed for the (deprecated) facet.date idiom, use facet.range
|
||||
instead. (Erick Erickson, Zacchio Bagnati, Ronald Matamoros, Vamsee Yalargadda)
|
||||
|
||||
Other Changes
|
||||
---------------------
|
||||
|
||||
|
|
|
@ -175,7 +175,7 @@ public class FacetComponent extends SearchComponent {
|
|||
|
||||
if (distribFieldFacetRefinements == null
|
||||
&& !pivotFacetRefinementRequestsExistForShard) {
|
||||
// nothing to refine, short circut out
|
||||
// nothing to refine, short circuit out
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -315,6 +315,8 @@ public class FacetComponent extends SearchComponent {
|
|||
|
||||
modifyRequestForFieldFacets(rb, sreq, fi);
|
||||
|
||||
modifyRequestForRangeFacets(sreq, fi);
|
||||
|
||||
modifyRequestForPivotFacets(rb, sreq, fi.pivotFacets);
|
||||
|
||||
sreq.params.remove(FacetParams.FACET_MINCOUNT);
|
||||
|
@ -327,6 +329,23 @@ public class FacetComponent extends SearchComponent {
|
|||
}
|
||||
}
|
||||
|
||||
// we must get all the range buckets back in order to have coherent lists at the end, see SOLR-6154
|
||||
private void modifyRequestForRangeFacets(ShardRequest sreq, FacetInfo fi) {
|
||||
// Collect all the range fields.
|
||||
if (sreq.params.getParams(FacetParams.FACET_RANGE) == null) {
|
||||
return;
|
||||
}
|
||||
List<String> rangeFields = new ArrayList<>();
|
||||
for (String field : sreq.params.getParams(FacetParams.FACET_RANGE)) {
|
||||
rangeFields.add(field);
|
||||
}
|
||||
|
||||
for (String field : rangeFields) {
|
||||
sreq.params.remove("f." + field + ".facet.mincount");
|
||||
sreq.params.add("f." + field + ".facet.mincount", "0");
|
||||
}
|
||||
}
|
||||
|
||||
private void modifyRequestForFieldFacets(ResponseBuilder rb, ShardRequest sreq, FacetInfo fi) {
|
||||
for (DistribFieldFacet dff : fi.facets.values()) {
|
||||
|
||||
|
@ -607,6 +626,100 @@ public class FacetComponent extends SearchComponent {
|
|||
}
|
||||
}
|
||||
}
|
||||
removeFieldFacetsUnderLimits(rb);
|
||||
removeRangeFacetsUnderLimits(rb);
|
||||
removeQueryFacetsUnderLimits(rb);
|
||||
|
||||
}
|
||||
|
||||
private void removeQueryFacetsUnderLimits(ResponseBuilder rb) {
|
||||
if (rb.stage != ResponseBuilder.STAGE_EXECUTE_QUERY) {
|
||||
return;
|
||||
}
|
||||
FacetInfo fi = rb._facetInfo;
|
||||
Map<String, QueryFacet> query_facets = fi.queryFacets;
|
||||
if (query_facets == null) {
|
||||
return;
|
||||
}
|
||||
LinkedHashMap<String, QueryFacet> newQueryFacets = new LinkedHashMap<>();
|
||||
|
||||
// The
|
||||
int minCount = rb.req.getParams().getInt(FacetParams.FACET_MINCOUNT, 0);
|
||||
boolean replace = false;
|
||||
for (Map.Entry<String, QueryFacet> ent : query_facets.entrySet()) {
|
||||
if (ent.getValue().count >= minCount) {
|
||||
newQueryFacets.put(ent.getKey(), ent.getValue());
|
||||
} else {
|
||||
log.trace("Removing facetQuery/key: " + ent.getKey() + "/" + ent.getValue().toString() + " mincount=" + minCount);
|
||||
replace = true;
|
||||
}
|
||||
}
|
||||
if (replace) {
|
||||
fi.queryFacets = newQueryFacets;
|
||||
}
|
||||
}
|
||||
|
||||
private void removeRangeFacetsUnderLimits(ResponseBuilder rb) {
|
||||
if (rb.stage != ResponseBuilder.STAGE_EXECUTE_QUERY) {
|
||||
return;
|
||||
}
|
||||
|
||||
FacetInfo fi = rb._facetInfo;
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
SimpleOrderedMap<SimpleOrderedMap<Object>> facet_ranges =
|
||||
(SimpleOrderedMap<SimpleOrderedMap<Object>>)
|
||||
fi.rangeFacets;
|
||||
|
||||
if (facet_ranges == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
// go through each facet_range
|
||||
for (Map.Entry<String, SimpleOrderedMap<Object>> entry : facet_ranges) {
|
||||
boolean replace = false;
|
||||
final String field = entry.getKey();
|
||||
int minCount = rb.req.getParams().getFieldInt(field, FacetParams.FACET_MINCOUNT, 0);
|
||||
if (minCount == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
NamedList<Integer> vals
|
||||
= (NamedList<Integer>) facet_ranges.get(field).get("counts");
|
||||
NamedList newList = new NamedList();
|
||||
for (Map.Entry<String, Integer> pair : vals) {
|
||||
if (pair.getValue() >= minCount) {
|
||||
newList.add(pair.getKey(), pair.getValue());
|
||||
} else {
|
||||
log.trace("Removing facet/key: " + pair.getKey() + "/" + pair.getValue().toString() + " mincount=" + minCount);
|
||||
replace = true;
|
||||
}
|
||||
}
|
||||
if (replace) {
|
||||
vals.clear();
|
||||
vals.addAll(newList);
|
||||
}
|
||||
}
|
||||
}
|
||||
private void removeFieldFacetsUnderLimits(ResponseBuilder rb) {
|
||||
if (rb.stage != ResponseBuilder.STAGE_DONE) {
|
||||
return;
|
||||
}
|
||||
|
||||
FacetInfo fi = rb._facetInfo;
|
||||
if (fi.facets == null) {
|
||||
return;
|
||||
}
|
||||
// Do field facets
|
||||
for (Entry<String, DistribFieldFacet> ent : fi.facets.entrySet()) {
|
||||
String field = ent.getKey();
|
||||
int minCount = rb.req.getParams().getFieldInt(field, FacetParams.FACET_MINCOUNT, 0);
|
||||
if (minCount == 0) { // return them all
|
||||
continue;
|
||||
}
|
||||
ent.getValue().respectMinCount(minCount);
|
||||
}
|
||||
}
|
||||
|
||||
// The implementation below uses the first encountered shard's
|
||||
|
@ -1248,6 +1361,22 @@ public class FacetComponent extends SearchComponent {
|
|||
// TODO: could store the last term in the shard to tell if this term
|
||||
// comes before or after it. If it comes before, we could subtract 1
|
||||
}
|
||||
|
||||
public void respectMinCount(long minCount) {
|
||||
HashMap<String, ShardFacetCount> newOne = new HashMap<>();
|
||||
boolean replace = false;
|
||||
for (Map.Entry<String, ShardFacetCount> ent : counts.entrySet()) {
|
||||
if (ent.getValue().count >= minCount) {
|
||||
newOne.put(ent.getKey(), ent.getValue());
|
||||
} else {
|
||||
log.trace("Removing facet/key: " + ent.getKey() + "/" + ent.getValue().toString() + " mincount=" + minCount);
|
||||
replace = true;
|
||||
}
|
||||
}
|
||||
if (replace) {
|
||||
counts = newOne;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -28,7 +28,9 @@ import org.apache.solr.client.solrj.SolrServer;
|
|||
import org.apache.solr.client.solrj.SolrServerException;
|
||||
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
|
||||
import org.apache.solr.client.solrj.impl.HttpSolrServer;
|
||||
import org.apache.solr.client.solrj.response.FacetField;
|
||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||
import org.apache.solr.client.solrj.response.RangeFacet;
|
||||
import org.apache.solr.cloud.ChaosMonkey;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
|
@ -226,6 +228,84 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
|
|||
"facet.range.gap",100,
|
||||
"f."+tlong+".facet.range.end",900);
|
||||
|
||||
// Test mincounts. Do NOT want to go through all the stuff where with validateControlData in query() method
|
||||
// Purposely packing a _bunch_ of stuff together here to insure that the proper level of mincount is used for
|
||||
// each
|
||||
ModifiableSolrParams minParams = new ModifiableSolrParams();
|
||||
minParams.set("q","*:*");
|
||||
minParams.set("rows", 1);
|
||||
minParams.set("facet", "true");
|
||||
minParams.set("facet.missing", "true");
|
||||
minParams.set("facet.field", i1);
|
||||
minParams.set("facet.missing", "true");
|
||||
minParams.set("facet.mincount", 2);
|
||||
|
||||
// Return a separate section of ranges over i1. Should respect global range mincount
|
||||
minParams.set("facet.range", i1);
|
||||
minParams.set("f." + i1 + ".facet.range.start", 0);
|
||||
minParams.set("f." + i1 + ".facet.range.gap", 200);
|
||||
minParams.set("f." + i1 + ".facet.range.end", 1200);
|
||||
minParams.set("f." + i1 + ".facet.mincount", 4);
|
||||
|
||||
|
||||
// Return a separate section of ranges over tlong Should respect facet.mincount
|
||||
minParams.add("facet.range", tlong);
|
||||
minParams.set("f." + tlong + ".facet.range.start", 0);
|
||||
minParams.set("f." + tlong + ".facet.range.gap", 100);
|
||||
minParams.set("f." + tlong + ".facet.range.end", 1200);
|
||||
// Repeat with a range type of date
|
||||
minParams.add("facet.range", tdate_b);
|
||||
minParams.set("f." + tdate_b + ".facet.range.start", "2009-02-01T00:00:00Z");
|
||||
minParams.set("f." + tdate_b + ".facet.range.gap", "+1YEAR");
|
||||
minParams.set("f." + tdate_b + ".facet.range.end", "2011-01-01T00:00:00Z");
|
||||
minParams.set("f." + tdate_b + ".facet.mincount", 3);
|
||||
|
||||
// Insure that global mincount is respected for facet queries
|
||||
minParams.set("facet.query", tdate_a + ":[2010-01-01T00:00:00Z TO 2011-01-01T00:00:00Z]"); // Should return some counts
|
||||
//minParams.set("facet.query", tdate_a + ":[* TO *]"); // Should be removed
|
||||
minParams.add("facet.query", tdate_b + ":[2008-01-01T00:00:00Z TO 2009-09-01T00:00:00Z]"); // Should be removed from response
|
||||
|
||||
|
||||
setDistributedParams(minParams);
|
||||
QueryResponse minResp = queryServer(minParams);
|
||||
|
||||
ModifiableSolrParams eParams = new ModifiableSolrParams();
|
||||
eParams.set("q",tdate_b + ":[* TO *]");
|
||||
eParams.set("rows", 1000);
|
||||
eParams.set("fl", tdate_b);
|
||||
setDistributedParams(eParams);
|
||||
QueryResponse eResp = queryServer(eParams);
|
||||
|
||||
// Check that exactly the right numbers of counts came through
|
||||
assertEquals("Should be exactly 2 range facets returned after minCounts taken into account ", 3, minResp.getFacetRanges().size());
|
||||
assertEquals("Should only be 1 query facets returned after minCounts taken into account ", 1, minResp.getFacetQuery().size());
|
||||
|
||||
checkMinCountsField(minResp.getFacetField(i1).getValues(), new Object[]{null, 55L}); // Should just be the null entries for field
|
||||
|
||||
checkMinCountsRange(minResp.getFacetRanges().get(0).getCounts(), new Object[]{"0", 5L}); // range on i1
|
||||
checkMinCountsRange(minResp.getFacetRanges().get(1).getCounts(), new Object[]{"0", 3L, "100", 3L}); // range on tlong
|
||||
checkMinCountsRange(minResp.getFacetRanges().get(2).getCounts(), new Object[]{"2009-02-01T00:00:00Z", 3L}); // date (range) on tvh
|
||||
|
||||
assertTrue("Should have a facet for tdate_a", minResp.getFacetQuery().containsKey("a_n_tdt:[2010-01-01T00:00:00Z TO 2011-01-01T00:00:00Z]"));
|
||||
int qCount = minResp.getFacetQuery().get("a_n_tdt:[2010-01-01T00:00:00Z TO 2011-01-01T00:00:00Z]");
|
||||
assertEquals("tdate_a should be 5", qCount, 5);
|
||||
|
||||
// Now let's do some queries, the above is getting too complex
|
||||
minParams = new ModifiableSolrParams();
|
||||
minParams.set("q","*:*");
|
||||
minParams.set("rows", 1);
|
||||
minParams.set("facet", "true");
|
||||
minParams.set("facet.mincount", 3);
|
||||
|
||||
minParams.set("facet.query", tdate_a + ":[2010-01-01T00:00:00Z TO 2010-05-04T00:00:00Z]");
|
||||
minParams.add("facet.query", tdate_b + ":[2009-01-01T00:00:00Z TO 2010-01-01T00:00:00Z]"); // Should be removed
|
||||
setDistributedParams(minParams);
|
||||
minResp = queryServer(minParams);
|
||||
|
||||
assertEquals("Should only be 1 query facets returned after minCounts taken into account ", 1, minResp.getFacetQuery().size());
|
||||
assertTrue("Should be an entry for a_n_tdt", minResp.getFacetQuery().containsKey("a_n_tdt:[2010-01-01T00:00:00Z TO 2010-05-04T00:00:00Z]"));
|
||||
qCount = minResp.getFacetQuery().get("a_n_tdt:[2010-01-01T00:00:00Z TO 2010-05-04T00:00:00Z]");
|
||||
assertEquals("a_n_tdt should have a count of 4 ", qCount, 4);
|
||||
// variations of fl
|
||||
query("q","*:*", "fl","score","sort",i1 + " desc");
|
||||
query("q","*:*", "fl",i1 + ",score","sort",i1 + " desc");
|
||||
|
@ -455,6 +535,32 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
protected void checkMinCountsField(List<FacetField.Count> counts, Object[] pairs) {
|
||||
assertEquals("There should be exactly " + pairs.length / 2 + " returned counts. There were: " + counts.size(), counts.size(), pairs.length / 2);
|
||||
assertTrue("Variable len param must be an even number, it was: " + pairs.length, (pairs.length % 2) == 0);
|
||||
for (int pairs_idx = 0, counts_idx = 0; pairs_idx < pairs.length; pairs_idx += 2, counts_idx++) {
|
||||
String act_name = counts.get(counts_idx).getName();
|
||||
long act_count = counts.get(counts_idx).getCount();
|
||||
String exp_name = (String) pairs[pairs_idx];
|
||||
long exp_count = (long) pairs[pairs_idx + 1];
|
||||
assertEquals("Expected ordered entry " + exp_name + " at position " + counts_idx + " got " + act_name, act_name, exp_name);
|
||||
assertEquals("Expected count for entry: " + exp_name + " at position " + counts_idx + " got " + act_count, act_count, exp_count);
|
||||
}
|
||||
}
|
||||
|
||||
protected void checkMinCountsRange(List<RangeFacet.Count> counts, Object[] pairs) {
|
||||
assertEquals("There should be exactly " + pairs.length / 2 + " returned counts. There were: " + counts.size(), counts.size(), pairs.length / 2);
|
||||
assertTrue("Variable len param must be an even number, it was: " + pairs.length, (pairs.length % 2) == 0);
|
||||
for (int pairs_idx = 0, counts_idx = 0; pairs_idx < pairs.length; pairs_idx += 2, counts_idx++) {
|
||||
String act_name = counts.get(counts_idx).getValue();
|
||||
long act_count = counts.get(counts_idx).getCount();
|
||||
String exp_name = (String) pairs[pairs_idx];
|
||||
long exp_count = (long) pairs[pairs_idx + 1];
|
||||
assertEquals("Expected ordered entry " + exp_name + " at position " + counts_idx + " got " + act_name, act_name, exp_name);
|
||||
assertEquals("Expected count for entry: " + exp_name + " at position " + counts_idx + " got " + act_count, act_count, exp_count);
|
||||
}
|
||||
}
|
||||
|
||||
protected void queryPartialResults(final List<String> upShards,
|
||||
final List<SolrServer> upClients,
|
||||
Object... q) throws Exception {
|
||||
|
|
Loading…
Reference in New Issue